openlit 1.29.1__py3-none-any.whl → 1.30.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +14 -0
- openlit/__init__.py +11 -3
- openlit/evals/utils.py +5 -2
- openlit/guard/utils.py +5 -1
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +0 -1
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +0 -1
- openlit/instrumentation/crewai/__init__.py +50 -0
- openlit/instrumentation/crewai/crewai.py +149 -0
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +0 -1
- openlit/instrumentation/litellm/__init__.py +54 -0
- openlit/instrumentation/litellm/async_litellm.py +406 -0
- openlit/instrumentation/litellm/litellm.py +406 -0
- openlit/instrumentation/openai/async_openai.py +236 -170
- openlit/instrumentation/openai/openai.py +208 -156
- openlit/otel/metrics.py +6 -3
- openlit/otel/tracing.py +5 -2
- openlit/semcov/__init__.py +21 -0
- {openlit-1.29.1.dist-info → openlit-1.30.2.dist-info}/METADATA +11 -11
- {openlit-1.29.1.dist-info → openlit-1.30.2.dist-info}/RECORD +21 -16
- {openlit-1.29.1.dist-info → openlit-1.30.2.dist-info}/LICENSE +0 -0
- {openlit-1.29.1.dist-info → openlit-1.30.2.dist-info}/WHEEL +0 -0
@@ -6,8 +6,15 @@ Module for monitoring OpenAI API calls.
|
|
6
6
|
import logging
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
8
|
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
9
|
-
from openlit.__helpers import
|
10
|
-
|
9
|
+
from openlit.__helpers import (
|
10
|
+
get_chat_model_cost,
|
11
|
+
get_embed_model_cost,
|
12
|
+
get_audio_model_cost,
|
13
|
+
get_image_model_cost,
|
14
|
+
openai_tokens,
|
15
|
+
handle_exception,
|
16
|
+
response_as_dict,
|
17
|
+
)
|
11
18
|
from openlit.semcov import SemanticConvetion
|
12
19
|
|
13
20
|
# Initialize logger for logging potential issues and operations
|
@@ -31,10 +38,184 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
31
38
|
A function that wraps the chat completions method to add telemetry.
|
32
39
|
"""
|
33
40
|
|
41
|
+
class TracedSyncStream:
|
42
|
+
"""
|
43
|
+
Wrapper for streaming responses to collect metrics and trace data.
|
44
|
+
Wraps the 'openai.AsyncStream' response to collect message IDs and aggregated response.
|
45
|
+
|
46
|
+
This class implements the '__aiter__' and '__anext__' methods that
|
47
|
+
handle asynchronous streaming responses.
|
48
|
+
|
49
|
+
This class also implements '__aenter__' and '__aexit__' methods that
|
50
|
+
handle asynchronous context management protocol.
|
51
|
+
"""
|
52
|
+
def __init__(
|
53
|
+
self,
|
54
|
+
wrapped,
|
55
|
+
span,
|
56
|
+
kwargs,
|
57
|
+
**args,
|
58
|
+
):
|
59
|
+
self.__wrapped__ = wrapped
|
60
|
+
self._span = span
|
61
|
+
# Placeholder for aggregating streaming response
|
62
|
+
self._llmresponse = ""
|
63
|
+
self._response_id = ""
|
64
|
+
|
65
|
+
self._args = args
|
66
|
+
self._kwargs = kwargs
|
67
|
+
|
68
|
+
def __enter__(self):
|
69
|
+
self.__wrapped__.__enter__()
|
70
|
+
return self
|
71
|
+
|
72
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
73
|
+
self.__wrapped__.__exit__(exc_type, exc_value, traceback)
|
74
|
+
|
75
|
+
def __iter__(self):
|
76
|
+
return self
|
77
|
+
|
78
|
+
def __getattr__(self, name):
|
79
|
+
"""Delegate attribute access to the wrapped object."""
|
80
|
+
return getattr(self.__wrapped__, name)
|
81
|
+
|
82
|
+
def __next__(self):
|
83
|
+
try:
|
84
|
+
chunk = self.__wrapped__.__next__()
|
85
|
+
chunked = response_as_dict(chunk)
|
86
|
+
# Collect message IDs and aggregated response from events
|
87
|
+
if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
|
88
|
+
'content' in chunked.get('choices')[0].get('delta'))):
|
89
|
+
|
90
|
+
content = chunked.get('choices')[0].get('delta').get('content')
|
91
|
+
if content:
|
92
|
+
self._llmresponse += content
|
93
|
+
self._response_id = chunked.get('id')
|
94
|
+
return chunk
|
95
|
+
except StopIteration:
|
96
|
+
# Handling exception ensure observability without disrupting operation
|
97
|
+
try:
|
98
|
+
# Format 'messages' into a single string
|
99
|
+
message_prompt = self._kwargs.get("messages", "")
|
100
|
+
formatted_messages = []
|
101
|
+
for message in message_prompt:
|
102
|
+
role = message["role"]
|
103
|
+
content = message["content"]
|
104
|
+
|
105
|
+
if isinstance(content, list):
|
106
|
+
content_str = ", ".join(
|
107
|
+
# pylint: disable=line-too-long
|
108
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
109
|
+
if "type" in item else f'text: {item["text"]}'
|
110
|
+
for item in content
|
111
|
+
)
|
112
|
+
formatted_messages.append(f"{role}: {content_str}")
|
113
|
+
else:
|
114
|
+
formatted_messages.append(f"{role}: {content}")
|
115
|
+
prompt = "\n".join(formatted_messages)
|
116
|
+
|
117
|
+
# Calculate tokens using input prompt and aggregated response
|
118
|
+
prompt_tokens = openai_tokens(prompt,
|
119
|
+
self._kwargs.get("model", "gpt-3.5-turbo"))
|
120
|
+
completion_tokens = openai_tokens(self._llmresponse,
|
121
|
+
self._kwargs.get("model", "gpt-3.5-turbo"))
|
122
|
+
|
123
|
+
# Calculate cost of the operation
|
124
|
+
cost = get_chat_model_cost(self._kwargs.get("model", "gpt-3.5-turbo"),
|
125
|
+
pricing_info, prompt_tokens,
|
126
|
+
completion_tokens)
|
127
|
+
|
128
|
+
# Set Span attributes
|
129
|
+
self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
130
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
131
|
+
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
132
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
133
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
134
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
135
|
+
gen_ai_endpoint)
|
136
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
137
|
+
self._response_id)
|
138
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
139
|
+
environment)
|
140
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
141
|
+
application_name)
|
142
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
143
|
+
self._kwargs.get("model", "gpt-3.5-turbo"))
|
144
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
145
|
+
self._kwargs.get("user", ""))
|
146
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
147
|
+
self._kwargs.get("top_p", 1.0))
|
148
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
149
|
+
self._kwargs.get("max_tokens", -1))
|
150
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
151
|
+
self._kwargs.get("temperature", 1.0))
|
152
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
153
|
+
self._kwargs.get("presence_penalty", 0.0))
|
154
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
155
|
+
self._kwargs.get("frequency_penalty", 0.0))
|
156
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
157
|
+
self._kwargs.get("seed", ""))
|
158
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
159
|
+
True)
|
160
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
161
|
+
prompt_tokens)
|
162
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
163
|
+
completion_tokens)
|
164
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
165
|
+
prompt_tokens + completion_tokens)
|
166
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
167
|
+
cost)
|
168
|
+
if trace_content:
|
169
|
+
self._span.add_event(
|
170
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
171
|
+
attributes={
|
172
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
173
|
+
},
|
174
|
+
)
|
175
|
+
self._span.add_event(
|
176
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
177
|
+
attributes={
|
178
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
179
|
+
},
|
180
|
+
)
|
181
|
+
|
182
|
+
self._span.set_status(Status(StatusCode.OK))
|
183
|
+
|
184
|
+
if disable_metrics is False:
|
185
|
+
attributes = {
|
186
|
+
TELEMETRY_SDK_NAME:
|
187
|
+
"openlit",
|
188
|
+
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
189
|
+
application_name,
|
190
|
+
SemanticConvetion.GEN_AI_SYSTEM:
|
191
|
+
SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
192
|
+
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
193
|
+
environment,
|
194
|
+
SemanticConvetion.GEN_AI_TYPE:
|
195
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
196
|
+
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
197
|
+
self._kwargs.get("model", "gpt-3.5-turbo")
|
198
|
+
}
|
199
|
+
|
200
|
+
metrics["genai_requests"].add(1, attributes)
|
201
|
+
metrics["genai_total_tokens"].add(
|
202
|
+
prompt_tokens + completion_tokens, attributes
|
203
|
+
)
|
204
|
+
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
205
|
+
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
206
|
+
metrics["genai_cost"].record(cost, attributes)
|
207
|
+
|
208
|
+
except Exception as e:
|
209
|
+
handle_exception(self._span, e)
|
210
|
+
logger.error("Error in trace creation: %s", e)
|
211
|
+
finally:
|
212
|
+
self._span.end()
|
213
|
+
raise
|
214
|
+
|
34
215
|
def wrapper(wrapped, instance, args, kwargs):
|
35
216
|
"""
|
36
217
|
Wraps the 'chat.completions' API call to add telemetry.
|
37
|
-
|
218
|
+
|
38
219
|
This collects metrics such as execution time, cost, and token usage, and handles errors
|
39
220
|
gracefully, adding details to the trace for observability.
|
40
221
|
|
@@ -54,141 +235,10 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
54
235
|
# pylint: disable=no-else-return
|
55
236
|
if streaming:
|
56
237
|
# Special handling for streaming response to accommodate the nature of data flow
|
57
|
-
|
58
|
-
|
59
|
-
# Placeholder for aggregating streaming response
|
60
|
-
llmresponse = ""
|
61
|
-
|
62
|
-
|
63
|
-
# Loop through streaming events capturing relevant details
|
64
|
-
for chunk in wrapped(*args, **kwargs):
|
65
|
-
# Collect message IDs and aggregated response from events
|
66
|
-
if len(chunk.choices) > 0:
|
67
|
-
# pylint: disable=line-too-long
|
68
|
-
if hasattr(chunk.choices[0], "delta") and hasattr(chunk.choices[0].delta, "content"):
|
69
|
-
content = chunk.choices[0].delta.content
|
70
|
-
if content:
|
71
|
-
llmresponse += content
|
72
|
-
yield chunk
|
73
|
-
response_id = chunk.id
|
74
|
-
|
75
|
-
# Handling exception ensure observability without disrupting operation
|
76
|
-
try:
|
77
|
-
# Format 'messages' into a single string
|
78
|
-
message_prompt = kwargs.get("messages", "")
|
79
|
-
formatted_messages = []
|
80
|
-
for message in message_prompt:
|
81
|
-
role = message["role"]
|
82
|
-
content = message["content"]
|
83
|
-
|
84
|
-
if isinstance(content, list):
|
85
|
-
content_str = ", ".join(
|
86
|
-
# pylint: disable=line-too-long
|
87
|
-
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
88
|
-
if "type" in item else f'text: {item["text"]}'
|
89
|
-
for item in content
|
90
|
-
)
|
91
|
-
formatted_messages.append(f"{role}: {content_str}")
|
92
|
-
else:
|
93
|
-
formatted_messages.append(f"{role}: {content}")
|
94
|
-
prompt = "\n".join(formatted_messages)
|
95
|
-
|
96
|
-
# Calculate tokens using input prompt and aggregated response
|
97
|
-
prompt_tokens = openai_tokens(prompt,
|
98
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
99
|
-
completion_tokens = openai_tokens(llmresponse,
|
100
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
101
|
-
|
102
|
-
# Calculate cost of the operation
|
103
|
-
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
104
|
-
pricing_info, prompt_tokens,
|
105
|
-
completion_tokens)
|
106
|
-
|
107
|
-
# Set Span attributes
|
108
|
-
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
109
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
110
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
|
111
|
-
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
112
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
113
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
114
|
-
gen_ai_endpoint)
|
115
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
116
|
-
response_id)
|
117
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
118
|
-
environment)
|
119
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
120
|
-
application_name)
|
121
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
122
|
-
kwargs.get("model", "gpt-3.5-turbo"))
|
123
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
124
|
-
kwargs.get("user", ""))
|
125
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
126
|
-
kwargs.get("top_p", 1.0))
|
127
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
128
|
-
kwargs.get("max_tokens", -1))
|
129
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
130
|
-
kwargs.get("temperature", 1.0))
|
131
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
132
|
-
kwargs.get("presence_penalty", 0.0))
|
133
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
134
|
-
kwargs.get("frequency_penalty", 0.0))
|
135
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
136
|
-
kwargs.get("seed", ""))
|
137
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
138
|
-
True)
|
139
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
140
|
-
prompt_tokens)
|
141
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
142
|
-
completion_tokens)
|
143
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
144
|
-
prompt_tokens + completion_tokens)
|
145
|
-
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
146
|
-
cost)
|
147
|
-
if trace_content:
|
148
|
-
span.add_event(
|
149
|
-
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
150
|
-
attributes={
|
151
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
152
|
-
},
|
153
|
-
)
|
154
|
-
span.add_event(
|
155
|
-
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
156
|
-
attributes={
|
157
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
158
|
-
},
|
159
|
-
)
|
160
|
-
|
161
|
-
span.set_status(Status(StatusCode.OK))
|
162
|
-
|
163
|
-
if disable_metrics is False:
|
164
|
-
attributes = {
|
165
|
-
TELEMETRY_SDK_NAME:
|
166
|
-
"openlit",
|
167
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
168
|
-
application_name,
|
169
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
170
|
-
SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
|
171
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
172
|
-
environment,
|
173
|
-
SemanticConvetion.GEN_AI_TYPE:
|
174
|
-
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
175
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
176
|
-
kwargs.get("model", "gpt-3.5-turbo")
|
177
|
-
}
|
178
|
-
|
179
|
-
metrics["genai_requests"].add(1, attributes)
|
180
|
-
metrics["genai_total_tokens"].add(
|
181
|
-
prompt_tokens + completion_tokens, attributes
|
182
|
-
)
|
183
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
184
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
185
|
-
metrics["genai_cost"].record(cost, attributes)
|
238
|
+
awaited_wrapped = wrapped(*args, **kwargs)
|
239
|
+
span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
|
186
240
|
|
187
|
-
|
188
|
-
handle_exception(span, e)
|
189
|
-
logger.error("Error in trace creation: %s", e)
|
190
|
-
|
191
|
-
return stream_generator()
|
241
|
+
return TracedSyncStream(awaited_wrapped, span, kwargs)
|
192
242
|
|
193
243
|
# Handling for non-streaming responses
|
194
244
|
else:
|
@@ -196,6 +246,8 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
196
246
|
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
197
247
|
response = wrapped(*args, **kwargs)
|
198
248
|
|
249
|
+
response_dict = response_as_dict(response)
|
250
|
+
|
199
251
|
try:
|
200
252
|
# Format 'messages' into a single string
|
201
253
|
message_prompt = kwargs.get("messages", "")
|
@@ -225,7 +277,7 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
225
277
|
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
226
278
|
gen_ai_endpoint)
|
227
279
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
228
|
-
|
280
|
+
response_dict.get("id"))
|
229
281
|
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
230
282
|
environment)
|
231
283
|
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
@@ -260,17 +312,17 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
260
312
|
if "tools" not in kwargs:
|
261
313
|
# Calculate cost of the operation
|
262
314
|
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
263
|
-
pricing_info,
|
264
|
-
|
315
|
+
pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
|
316
|
+
response_dict.get('usage', {}).get('completion_tokens', None))
|
265
317
|
|
266
318
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
267
|
-
|
319
|
+
response_dict.get('usage', {}).get('prompt_tokens', None))
|
268
320
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
269
|
-
|
321
|
+
response_dict.get('usage', {}).get('completion_tokens', None))
|
270
322
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
271
|
-
|
323
|
+
response_dict.get('usage', {}).get('total_tokens', None))
|
272
324
|
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
273
|
-
|
325
|
+
[response_dict.get('choices', [])[0].get('finish_reason', None)])
|
274
326
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
275
327
|
cost)
|
276
328
|
|
@@ -280,7 +332,7 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
280
332
|
span.add_event(
|
281
333
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
282
334
|
attributes={
|
283
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION:
|
335
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
|
284
336
|
},
|
285
337
|
)
|
286
338
|
|
@@ -292,7 +344,7 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
292
344
|
span.add_event(
|
293
345
|
name=attribute_name,
|
294
346
|
attributes={
|
295
|
-
SemanticConvetion.GEN_AI_CONTENT_COMPLETION:
|
347
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
|
296
348
|
},
|
297
349
|
)
|
298
350
|
i += 1
|
@@ -304,8 +356,8 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
304
356
|
elif "tools" in kwargs:
|
305
357
|
# Calculate cost of the operation
|
306
358
|
cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
|
307
|
-
pricing_info,
|
308
|
-
|
359
|
+
pricing_info, response_dict.get('usage').get('prompt_tokens'),
|
360
|
+
response_dict.get('usage').get('completion_tokens'))
|
309
361
|
span.add_event(
|
310
362
|
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
311
363
|
attributes={
|
@@ -313,11 +365,11 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
313
365
|
},
|
314
366
|
)
|
315
367
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
316
|
-
|
368
|
+
response_dict.get('usage').get('prompt_tokens'))
|
317
369
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
318
|
-
|
370
|
+
response_dict.get('usage').get('completion_tokens'))
|
319
371
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
320
|
-
|
372
|
+
response_dict.get('usage').get('total_tokens'))
|
321
373
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
322
374
|
cost)
|
323
375
|
|
@@ -340,9 +392,9 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
|
|
340
392
|
}
|
341
393
|
|
342
394
|
metrics["genai_requests"].add(1, attributes)
|
343
|
-
metrics["genai_total_tokens"].add(
|
344
|
-
metrics["genai_completion_tokens"].add(
|
345
|
-
metrics["genai_prompt_tokens"].add(
|
395
|
+
metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
|
396
|
+
metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
|
397
|
+
metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
|
346
398
|
metrics["genai_cost"].record(cost, attributes)
|
347
399
|
|
348
400
|
# Return original response
|
@@ -416,8 +468,8 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
|
|
416
468
|
kwargs.get("model", "text-embedding-ada-002"))
|
417
469
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
|
418
470
|
kwargs.get("encoding_format", "float"))
|
419
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
|
420
|
-
|
471
|
+
# span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
|
472
|
+
# kwargs.get("dimensions", "null"))
|
421
473
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
|
422
474
|
kwargs.get("user", ""))
|
423
475
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
openlit/otel/metrics.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# pylint: disable=duplicate-code, line-too-long
|
1
|
+
# pylint: disable=duplicate-code, line-too-long, ungrouped-imports
|
2
2
|
"""
|
3
3
|
Setups up OpenTelemetry Meter
|
4
4
|
"""
|
@@ -8,10 +8,13 @@ from opentelemetry.sdk.metrics import MeterProvider
|
|
8
8
|
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader, ConsoleMetricExporter
|
9
9
|
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
10
10
|
from opentelemetry.sdk.resources import Resource
|
11
|
-
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
12
|
-
|
13
11
|
from openlit.semcov import SemanticConvetion
|
14
12
|
|
13
|
+
if os.environ.get("OTEL_EXPORTER_OTLP_PROTOCOL") == "grpc":
|
14
|
+
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
|
15
|
+
else:
|
16
|
+
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
17
|
+
|
15
18
|
# Global flag to check if the meter provider initialization is complete.
|
16
19
|
METER_SET = False
|
17
20
|
|
openlit/otel/tracing.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# pylint: disable=duplicate-code, line-too-long
|
1
|
+
# pylint: disable=duplicate-code, line-too-long, ungrouped-imports
|
2
2
|
"""
|
3
3
|
Setups up OpenTelemetry tracer
|
4
4
|
"""
|
@@ -10,8 +10,11 @@ from opentelemetry.sdk.resources import Resource
|
|
10
10
|
from opentelemetry.sdk.trace import TracerProvider
|
11
11
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor, SimpleSpanProcessor
|
12
12
|
from opentelemetry.sdk.trace.export import ConsoleSpanExporter
|
13
|
-
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
14
13
|
|
14
|
+
if os.environ.get("OTEL_EXPORTER_OTLP_PROTOCOL") == "grpc":
|
15
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
16
|
+
else:
|
17
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
15
18
|
|
16
19
|
# Global flag to check if the tracer provider initialization is complete.
|
17
20
|
TRACER_SET = False
|
openlit/semcov/__init__.py
CHANGED
@@ -88,6 +88,7 @@ class SemanticConvetion:
|
|
88
88
|
GEN_AI_TYPE_FINETUNING = "fine_tuning"
|
89
89
|
GEN_AI_TYPE_VECTORDB = "vectordb"
|
90
90
|
GEN_AI_TYPE_FRAMEWORK = "framework"
|
91
|
+
GEN_AI_TYPE_AGENT = "agent"
|
91
92
|
|
92
93
|
GEN_AI_SYSTEM_HUGGING_FACE = "huggingface"
|
93
94
|
GEN_AI_SYSTEM_OPENAI = "openai"
|
@@ -108,6 +109,8 @@ class SemanticConvetion:
|
|
108
109
|
GEN_AI_SYSTEM_LLAMAINDEX = "llama_index"
|
109
110
|
GEN_AI_SYSTEM_HAYSTACK = "haystack"
|
110
111
|
GEN_AI_SYSTEM_EMBEDCHAIN = "embedchain"
|
112
|
+
GEN_AI_SYSTEM_LITELLM = "litellm"
|
113
|
+
GEN_AI_SYSTEM_CREWAI = "crewai"
|
111
114
|
|
112
115
|
# Vector DB
|
113
116
|
DB_REQUESTS = "db.total.requests"
|
@@ -154,6 +157,24 @@ class SemanticConvetion:
|
|
154
157
|
DB_SYSTEM_QDRANT = "qdrant"
|
155
158
|
DB_SYSTEM_MILVUS = "milvus"
|
156
159
|
|
160
|
+
# Agents
|
161
|
+
GEN_AI_AGENT_ID = "gen_ai.agent.id"
|
162
|
+
GEN_AI_AGENT_TASK_ID = "gen_ai.agent.task.id"
|
163
|
+
GEN_AI_AGENT_ROLE = "gen_ai.agent.role"
|
164
|
+
GEN_AI_AGENT_GOAL = "gen_ai.agent.goal"
|
165
|
+
GEN_AI_AGENT_CONTEXT = "gen_ai.agent.context"
|
166
|
+
GEN_AI_AGENT_ENABLE_CACHE = "gen_ai.agent.enable_cache"
|
167
|
+
GEN_AI_AGENT_ALLOW_DELEGATION = "gen_ai.agent.allow_delegation"
|
168
|
+
GEN_AI_AGENT_ALLOW_CODE_EXECUTION = "gen_ai.agent.allow_code_execution"
|
169
|
+
GEN_AI_AGENT_MAX_RETRY_LIMIT = "gen_ai.agent.max_retry_limit"
|
170
|
+
GEN_AI_AGENT_TOOLS = "gen_ai.agent.tools"
|
171
|
+
GEN_AI_AGENT_TOOL_RESULTS = "gen_ai.agent.tool_results"
|
172
|
+
GEN_AI_AGENT_TASK = "gen_ai.agent.task"
|
173
|
+
GEN_AI_AGENT_EXPECTED_OUTPUT = "gen_ai.agent.expected_output"
|
174
|
+
GEN_AI_AGENT_ACTUAL_OUTPUT = "gen_ai.agent.actual_output"
|
175
|
+
GEN_AI_AGENT_HUMAN_INPUT = "gen_ai.agent.human_input"
|
176
|
+
GEN_AI_AGENT_TASK_ASSOCIATION = "gen_ai.agent.task_associations"
|
177
|
+
|
157
178
|
# GPU
|
158
179
|
GPU_INDEX = "gpu.index"
|
159
180
|
GPU_UUID = "gpu.uuid"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: openlit
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.30.2
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
|
5
5
|
Home-page: https://github.com/openlit/openlit/tree/main/openlit/python
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
|
@@ -63,16 +63,16 @@ This project proudly follows and maintains the [Semantic Conventions](https://gi
|
|
63
63
|
|
64
64
|
## Auto Instrumentation Capabilities
|
65
65
|
|
66
|
-
| LLMs | Vector DBs | Frameworks
|
67
|
-
|
68
|
-
| [✅ OpenAI](https://docs.openlit.io/latest/integrations/openai) | [✅ ChromaDB](https://docs.openlit.io/latest/integrations/chromadb) | [✅ Langchain](https://docs.openlit.io/latest/integrations/langchain)
|
69
|
-
| [✅ Ollama](https://docs.openlit.io/latest/integrations/ollama) | [✅ Pinecone](https://docs.openlit.io/latest/integrations/pinecone) | [✅ LiteLLM](https://docs.openlit.io/latest/integrations/litellm)
|
70
|
-
| [✅ Anthropic](https://docs.openlit.io/latest/integrations/anthropic) | [✅ Qdrant](https://docs.openlit.io/latest/integrations/qdrant) | [✅ LlamaIndex](https://docs.openlit.io/latest/integrations/llama-index)
|
71
|
-
| [✅ GPT4All](https://docs.openlit.io/latest/integrations/gpt4all) | [✅ Milvus](https://docs.openlit.io/latest/integrations/milvus) | [✅ Haystack](https://docs.openlit.io/latest/integrations/haystack)
|
72
|
-
| [✅ Cohere](https://docs.openlit.io/latest/integrations/cohere) | | [✅ EmbedChain](https://docs.openlit.io/latest/integrations/embedchain)
|
73
|
-
| [✅ Mistral](https://docs.openlit.io/latest/integrations/mistral) | | [✅ Guardrails](https://docs.openlit.io/latest/integrations/guardrails)
|
74
|
-
| [✅ Azure OpenAI](https://docs.openlit.io/latest/integrations/azure-openai) | |
|
75
|
-
| [✅ Azure AI Inference](https://docs.openlit.io/latest/integrations/azure-ai-inference) | |
|
66
|
+
| LLMs | Vector DBs | Frameworks | GPUs |
|
67
|
+
|--------------------------------------------------------------------------|----------------------------------------------|-------------------------------------------------|---------------|
|
68
|
+
| [✅ OpenAI](https://docs.openlit.io/latest/integrations/openai) | [✅ ChromaDB](https://docs.openlit.io/latest/integrations/chromadb) | [✅ Langchain](https://docs.openlit.io/latest/integrations/langchain) | [✅ NVIDIA](https://docs.openlit.io/latest/integrations/nvidia-gpu) |
|
69
|
+
| [✅ Ollama](https://docs.openlit.io/latest/integrations/ollama) | [✅ Pinecone](https://docs.openlit.io/latest/integrations/pinecone) | [✅ LiteLLM](https://docs.openlit.io/latest/integrations/litellm) | [✅ AMD](#) |
|
70
|
+
| [✅ Anthropic](https://docs.openlit.io/latest/integrations/anthropic) | [✅ Qdrant](https://docs.openlit.io/latest/integrations/qdrant) | [✅ LlamaIndex](https://docs.openlit.io/latest/integrations/llama-index) | |
|
71
|
+
| [✅ GPT4All](https://docs.openlit.io/latest/integrations/gpt4all) | [✅ Milvus](https://docs.openlit.io/latest/integrations/milvus) | [✅ Haystack](https://docs.openlit.io/latest/integrations/haystack) | |
|
72
|
+
| [✅ Cohere](https://docs.openlit.io/latest/integrations/cohere) | | [✅ EmbedChain](https://docs.openlit.io/latest/integrations/embedchain) | |
|
73
|
+
| [✅ Mistral](https://docs.openlit.io/latest/integrations/mistral) | | [✅ Guardrails](https://docs.openlit.io/latest/integrations/guardrails) | |
|
74
|
+
| [✅ Azure OpenAI](https://docs.openlit.io/latest/integrations/azure-openai) | | [✅ CrewAI](https://docs.openlit.io/latest/integrations/crewai) | |
|
75
|
+
| [✅ Azure AI Inference](https://docs.openlit.io/latest/integrations/azure-ai-inference) | |
|
76
76
|
| [✅ GitHub AI Models](https://docs.openlit.io/latest/integrations/github-models) | | | |
|
77
77
|
| [✅ HuggingFace Transformers](https://docs.openlit.io/latest/integrations/huggingface) | | | |
|
78
78
|
| [✅ Amazon Bedrock](https://docs.openlit.io/latest/integrations/bedrock) | | | |
|