openlit 1.34.29__py3-none-any.whl → 1.34.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +235 -86
- openlit/__init__.py +16 -13
- openlit/_instrumentors.py +2 -1
- openlit/evals/all.py +50 -21
- openlit/evals/bias_detection.py +47 -20
- openlit/evals/hallucination.py +53 -22
- openlit/evals/toxicity.py +50 -21
- openlit/evals/utils.py +54 -30
- openlit/guard/all.py +61 -19
- openlit/guard/prompt_injection.py +34 -14
- openlit/guard/restrict_topic.py +46 -15
- openlit/guard/sensitive_topic.py +34 -14
- openlit/guard/utils.py +58 -22
- openlit/instrumentation/ag2/__init__.py +24 -8
- openlit/instrumentation/ag2/ag2.py +34 -13
- openlit/instrumentation/ag2/async_ag2.py +34 -13
- openlit/instrumentation/ag2/utils.py +133 -30
- openlit/instrumentation/ai21/__init__.py +43 -14
- openlit/instrumentation/ai21/ai21.py +47 -21
- openlit/instrumentation/ai21/async_ai21.py +47 -21
- openlit/instrumentation/ai21/utils.py +299 -78
- openlit/instrumentation/anthropic/__init__.py +21 -4
- openlit/instrumentation/anthropic/anthropic.py +28 -17
- openlit/instrumentation/anthropic/async_anthropic.py +28 -17
- openlit/instrumentation/anthropic/utils.py +145 -35
- openlit/instrumentation/assemblyai/__init__.py +11 -2
- openlit/instrumentation/assemblyai/assemblyai.py +15 -4
- openlit/instrumentation/assemblyai/utils.py +120 -25
- openlit/instrumentation/astra/__init__.py +43 -10
- openlit/instrumentation/astra/astra.py +28 -5
- openlit/instrumentation/astra/async_astra.py +28 -5
- openlit/instrumentation/astra/utils.py +151 -55
- openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/utils.py +307 -83
- openlit/instrumentation/bedrock/__init__.py +21 -4
- openlit/instrumentation/bedrock/bedrock.py +63 -25
- openlit/instrumentation/bedrock/utils.py +139 -30
- openlit/instrumentation/chroma/__init__.py +89 -16
- openlit/instrumentation/chroma/chroma.py +28 -6
- openlit/instrumentation/chroma/utils.py +167 -51
- openlit/instrumentation/cohere/__init__.py +63 -18
- openlit/instrumentation/cohere/async_cohere.py +63 -24
- openlit/instrumentation/cohere/cohere.py +63 -24
- openlit/instrumentation/cohere/utils.py +286 -73
- openlit/instrumentation/controlflow/__init__.py +35 -9
- openlit/instrumentation/controlflow/controlflow.py +66 -33
- openlit/instrumentation/crawl4ai/__init__.py +25 -10
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
- openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
- openlit/instrumentation/crewai/__init__.py +111 -24
- openlit/instrumentation/crewai/async_crewai.py +114 -0
- openlit/instrumentation/crewai/crewai.py +104 -131
- openlit/instrumentation/crewai/utils.py +615 -0
- openlit/instrumentation/dynamiq/__init__.py +46 -12
- openlit/instrumentation/dynamiq/dynamiq.py +74 -33
- openlit/instrumentation/elevenlabs/__init__.py +23 -4
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/utils.py +128 -25
- openlit/instrumentation/embedchain/__init__.py +11 -2
- openlit/instrumentation/embedchain/embedchain.py +68 -35
- openlit/instrumentation/firecrawl/__init__.py +24 -7
- openlit/instrumentation/firecrawl/firecrawl.py +46 -20
- openlit/instrumentation/google_ai_studio/__init__.py +45 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/utils.py +180 -67
- openlit/instrumentation/gpt4all/__init__.py +22 -7
- openlit/instrumentation/gpt4all/gpt4all.py +67 -29
- openlit/instrumentation/gpt4all/utils.py +285 -61
- openlit/instrumentation/gpu/__init__.py +128 -47
- openlit/instrumentation/groq/__init__.py +21 -4
- openlit/instrumentation/groq/async_groq.py +33 -21
- openlit/instrumentation/groq/groq.py +33 -21
- openlit/instrumentation/groq/utils.py +192 -55
- openlit/instrumentation/haystack/__init__.py +70 -24
- openlit/instrumentation/haystack/async_haystack.py +28 -6
- openlit/instrumentation/haystack/haystack.py +28 -6
- openlit/instrumentation/haystack/utils.py +196 -74
- openlit/instrumentation/julep/__init__.py +69 -19
- openlit/instrumentation/julep/async_julep.py +53 -27
- openlit/instrumentation/julep/julep.py +53 -28
- openlit/instrumentation/langchain/__init__.py +74 -63
- openlit/instrumentation/langchain/callback_handler.py +1100 -0
- openlit/instrumentation/langchain_community/__init__.py +13 -2
- openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/utils.py +35 -9
- openlit/instrumentation/letta/__init__.py +68 -15
- openlit/instrumentation/letta/letta.py +99 -54
- openlit/instrumentation/litellm/__init__.py +43 -14
- openlit/instrumentation/litellm/async_litellm.py +51 -26
- openlit/instrumentation/litellm/litellm.py +51 -26
- openlit/instrumentation/litellm/utils.py +312 -101
- openlit/instrumentation/llamaindex/__init__.py +267 -90
- openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/utils.py +204 -91
- openlit/instrumentation/mem0/__init__.py +11 -2
- openlit/instrumentation/mem0/mem0.py +50 -29
- openlit/instrumentation/milvus/__init__.py +10 -2
- openlit/instrumentation/milvus/milvus.py +31 -6
- openlit/instrumentation/milvus/utils.py +166 -67
- openlit/instrumentation/mistral/__init__.py +63 -18
- openlit/instrumentation/mistral/async_mistral.py +63 -24
- openlit/instrumentation/mistral/mistral.py +63 -24
- openlit/instrumentation/mistral/utils.py +277 -69
- openlit/instrumentation/multion/__init__.py +69 -19
- openlit/instrumentation/multion/async_multion.py +57 -26
- openlit/instrumentation/multion/multion.py +57 -26
- openlit/instrumentation/ollama/__init__.py +39 -18
- openlit/instrumentation/ollama/async_ollama.py +57 -26
- openlit/instrumentation/ollama/ollama.py +57 -26
- openlit/instrumentation/ollama/utils.py +226 -50
- openlit/instrumentation/openai/__init__.py +156 -32
- openlit/instrumentation/openai/async_openai.py +147 -67
- openlit/instrumentation/openai/openai.py +150 -67
- openlit/instrumentation/openai/utils.py +660 -186
- openlit/instrumentation/openai_agents/__init__.py +6 -2
- openlit/instrumentation/openai_agents/processor.py +409 -537
- openlit/instrumentation/phidata/__init__.py +13 -5
- openlit/instrumentation/phidata/phidata.py +67 -32
- openlit/instrumentation/pinecone/__init__.py +48 -9
- openlit/instrumentation/pinecone/async_pinecone.py +27 -5
- openlit/instrumentation/pinecone/pinecone.py +27 -5
- openlit/instrumentation/pinecone/utils.py +153 -47
- openlit/instrumentation/premai/__init__.py +22 -7
- openlit/instrumentation/premai/premai.py +51 -26
- openlit/instrumentation/premai/utils.py +246 -59
- openlit/instrumentation/pydantic_ai/__init__.py +49 -22
- openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
- openlit/instrumentation/pydantic_ai/utils.py +89 -24
- openlit/instrumentation/qdrant/__init__.py +19 -4
- openlit/instrumentation/qdrant/async_qdrant.py +33 -7
- openlit/instrumentation/qdrant/qdrant.py +33 -7
- openlit/instrumentation/qdrant/utils.py +228 -93
- openlit/instrumentation/reka/__init__.py +23 -10
- openlit/instrumentation/reka/async_reka.py +17 -11
- openlit/instrumentation/reka/reka.py +17 -11
- openlit/instrumentation/reka/utils.py +138 -36
- openlit/instrumentation/together/__init__.py +44 -12
- openlit/instrumentation/together/async_together.py +50 -27
- openlit/instrumentation/together/together.py +50 -27
- openlit/instrumentation/together/utils.py +301 -71
- openlit/instrumentation/transformers/__init__.py +2 -1
- openlit/instrumentation/transformers/transformers.py +13 -3
- openlit/instrumentation/transformers/utils.py +139 -36
- openlit/instrumentation/vertexai/__init__.py +81 -16
- openlit/instrumentation/vertexai/async_vertexai.py +33 -15
- openlit/instrumentation/vertexai/utils.py +123 -27
- openlit/instrumentation/vertexai/vertexai.py +33 -15
- openlit/instrumentation/vllm/__init__.py +12 -5
- openlit/instrumentation/vllm/utils.py +121 -31
- openlit/instrumentation/vllm/vllm.py +16 -10
- openlit/otel/events.py +35 -10
- openlit/otel/metrics.py +32 -24
- openlit/otel/tracing.py +24 -9
- openlit/semcov/__init__.py +101 -7
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
- openlit-1.34.31.dist-info/RECORD +166 -0
- openlit/instrumentation/langchain/async_langchain.py +0 -102
- openlit/instrumentation/langchain/langchain.py +0 -102
- openlit/instrumentation/langchain/utils.py +0 -252
- openlit-1.34.29.dist-info/RECORD +0 -166
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
@@ -1,6 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
LiteLLM OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
+
|
4
5
|
import time
|
5
6
|
|
6
7
|
from opentelemetry.trace import Status, StatusCode
|
@@ -17,6 +18,7 @@ from openlit.__helpers import (
|
|
17
18
|
)
|
18
19
|
from openlit.semcov import SemanticConvention
|
19
20
|
|
21
|
+
|
20
22
|
def format_content(messages):
|
21
23
|
"""
|
22
24
|
Process a list of messages to extract content.
|
@@ -24,20 +26,22 @@ def format_content(messages):
|
|
24
26
|
|
25
27
|
formatted_messages = []
|
26
28
|
for message in messages:
|
27
|
-
role = message[
|
28
|
-
content = message[
|
29
|
+
role = message["role"]
|
30
|
+
content = message["content"]
|
29
31
|
|
30
32
|
if isinstance(content, list):
|
31
33
|
content_str = ", ".join(
|
32
|
-
f
|
33
|
-
if "type" in item
|
34
|
+
f"{item['type']}: {item['text'] if 'text' in item else item['image_url']}"
|
35
|
+
if "type" in item
|
36
|
+
else f"text: {item['text']}"
|
34
37
|
for item in content
|
35
38
|
)
|
36
|
-
formatted_messages.append(f
|
39
|
+
formatted_messages.append(f"{role}: {content_str}")
|
37
40
|
else:
|
38
|
-
formatted_messages.append(f
|
41
|
+
formatted_messages.append(f"{role}: {content}")
|
42
|
+
|
43
|
+
return "\n".join(formatted_messages)
|
39
44
|
|
40
|
-
return '\n'.join(formatted_messages)
|
41
45
|
|
42
46
|
def process_chunk(scope, chunk):
|
43
47
|
"""
|
@@ -55,45 +59,63 @@ def process_chunk(scope, chunk):
|
|
55
59
|
chunked = response_as_dict(chunk)
|
56
60
|
|
57
61
|
# Collect message IDs and aggregated response from events
|
58
|
-
if
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
+
if len(chunked.get("choices", [])) > 0 and (
|
63
|
+
"delta" in chunked.get("choices")[0]
|
64
|
+
and "content" in chunked.get("choices")[0].get("delta", {})
|
65
|
+
):
|
66
|
+
content = chunked.get("choices")[0].get("delta").get("content")
|
62
67
|
if content:
|
63
68
|
scope._llmresponse += content
|
64
69
|
|
65
70
|
# Handle tool calls in streaming - optimized
|
66
|
-
delta_tools = chunked.get(
|
71
|
+
delta_tools = chunked.get("choices", [{}])[0].get("delta", {}).get("tool_calls")
|
67
72
|
if delta_tools:
|
68
73
|
scope._tools = scope._tools or []
|
69
74
|
|
70
75
|
for tool in delta_tools:
|
71
|
-
idx = tool.get(
|
76
|
+
idx = tool.get("index", 0)
|
72
77
|
|
73
78
|
# Extend list if needed
|
74
79
|
scope._tools.extend([{}] * (idx + 1 - len(scope._tools)))
|
75
80
|
|
76
|
-
if tool.get(
|
77
|
-
func = tool.get(
|
81
|
+
if tool.get("id"): # New tool (id exists)
|
82
|
+
func = tool.get("function", {})
|
78
83
|
scope._tools[idx] = {
|
79
|
-
|
80
|
-
|
81
|
-
|
84
|
+
"id": tool["id"],
|
85
|
+
"function": {
|
86
|
+
"name": func.get("name", ""),
|
87
|
+
"arguments": func.get("arguments", ""),
|
88
|
+
},
|
89
|
+
"type": tool.get("type", "function"),
|
82
90
|
}
|
83
|
-
elif
|
84
|
-
scope._tools[idx]
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
scope.
|
92
|
-
scope.
|
91
|
+
elif (
|
92
|
+
scope._tools[idx] and "function" in tool
|
93
|
+
): # Append args (id is None)
|
94
|
+
scope._tools[idx]["function"]["arguments"] += tool["function"].get(
|
95
|
+
"arguments", ""
|
96
|
+
)
|
97
|
+
|
98
|
+
if chunked.get("usage"):
|
99
|
+
scope._input_tokens = chunked.get("usage").get("prompt_tokens", 0)
|
100
|
+
scope._output_tokens = chunked.get("usage").get("completion_tokens", 0)
|
101
|
+
scope._response_id = chunked.get("id")
|
102
|
+
scope._response_model = chunked.get("model")
|
103
|
+
scope._finish_reason = chunked.get("choices", [{}])[0].get("finish_reason")
|
104
|
+
scope._response_service_tier = str(chunked.get("system_fingerprint", ""))
|
93
105
|
scope._end_time = time.time()
|
94
106
|
|
95
|
-
|
96
|
-
|
107
|
+
|
108
|
+
def common_chat_logic(
|
109
|
+
scope,
|
110
|
+
pricing_info,
|
111
|
+
environment,
|
112
|
+
application_name,
|
113
|
+
metrics,
|
114
|
+
capture_message_content,
|
115
|
+
disable_metrics,
|
116
|
+
version,
|
117
|
+
is_stream,
|
118
|
+
):
|
97
119
|
"""
|
98
120
|
Process chat request and generate Telemetry
|
99
121
|
"""
|
@@ -101,60 +123,135 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
101
123
|
if len(scope._timestamps) > 1:
|
102
124
|
scope._tbt = calculate_tbt(scope._timestamps)
|
103
125
|
|
104
|
-
prompt = format_content(scope._kwargs.get(
|
105
|
-
request_model = scope._kwargs.get(
|
126
|
+
prompt = format_content(scope._kwargs.get("messages", []))
|
127
|
+
request_model = scope._kwargs.get("model", "openai/gpt-4o")
|
106
128
|
|
107
|
-
cost = get_chat_model_cost(
|
129
|
+
cost = get_chat_model_cost(
|
130
|
+
request_model, pricing_info, scope._input_tokens, scope._output_tokens
|
131
|
+
)
|
108
132
|
|
109
133
|
# Common Span Attributes
|
110
|
-
common_span_attributes(
|
111
|
-
|
112
|
-
|
113
|
-
|
134
|
+
common_span_attributes(
|
135
|
+
scope,
|
136
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
137
|
+
SemanticConvention.GEN_AI_SYSTEM_LITELLM,
|
138
|
+
scope._server_address,
|
139
|
+
scope._server_port,
|
140
|
+
request_model,
|
141
|
+
scope._response_model,
|
142
|
+
environment,
|
143
|
+
application_name,
|
144
|
+
is_stream,
|
145
|
+
scope._tbt,
|
146
|
+
scope._ttft,
|
147
|
+
version,
|
148
|
+
)
|
149
|
+
|
150
|
+
# Helper function to handle None values with proper defaults
|
151
|
+
def safe_get(value, default):
|
152
|
+
return default if value is None else value
|
114
153
|
|
115
154
|
# Span Attributes for Request parameters
|
116
|
-
scope._span.set_attribute(
|
117
|
-
|
118
|
-
|
119
|
-
scope._span.set_attribute(
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
scope._span.set_attribute(
|
124
|
-
|
155
|
+
scope._span.set_attribute(
|
156
|
+
SemanticConvention.GEN_AI_REQUEST_SEED, safe_get(scope._kwargs.get("seed"), "")
|
157
|
+
)
|
158
|
+
scope._span.set_attribute(
|
159
|
+
SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
160
|
+
safe_get(scope._kwargs.get("frequency_penalty"), 0.0),
|
161
|
+
)
|
162
|
+
scope._span.set_attribute(
|
163
|
+
SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
164
|
+
safe_get(scope._kwargs.get("max_tokens"), -1),
|
165
|
+
)
|
166
|
+
scope._span.set_attribute(
|
167
|
+
SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
168
|
+
safe_get(scope._kwargs.get("presence_penalty"), 0.0),
|
169
|
+
)
|
170
|
+
scope._span.set_attribute(
|
171
|
+
SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop", [])
|
172
|
+
)
|
173
|
+
scope._span.set_attribute(
|
174
|
+
SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
175
|
+
safe_get(scope._kwargs.get("temperature"), 1.0),
|
176
|
+
)
|
177
|
+
scope._span.set_attribute(
|
178
|
+
SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
179
|
+
safe_get(scope._kwargs.get("top_p"), 1.0),
|
180
|
+
)
|
181
|
+
scope._span.set_attribute(
|
182
|
+
SemanticConvention.GEN_AI_REQUEST_USER, safe_get(scope._kwargs.get("user"), "")
|
183
|
+
)
|
184
|
+
scope._span.set_attribute(
|
185
|
+
SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
|
186
|
+
safe_get(scope._kwargs.get("service_tier"), "auto"),
|
187
|
+
)
|
125
188
|
|
126
189
|
# Span Attributes for Response parameters
|
127
190
|
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
|
128
|
-
scope._span.set_attribute(
|
129
|
-
|
130
|
-
|
131
|
-
scope._span.set_attribute(
|
191
|
+
scope._span.set_attribute(
|
192
|
+
SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason]
|
193
|
+
)
|
194
|
+
scope._span.set_attribute(
|
195
|
+
SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER, scope._response_service_tier
|
196
|
+
)
|
197
|
+
scope._span.set_attribute(
|
198
|
+
SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
|
199
|
+
scope._response_service_tier,
|
200
|
+
)
|
201
|
+
scope._span.set_attribute(
|
202
|
+
SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
203
|
+
"text" if isinstance(scope._llmresponse, str) else "json",
|
204
|
+
)
|
132
205
|
|
133
206
|
# Span Attributes for Cost and Tokens
|
134
|
-
scope._span.set_attribute(
|
135
|
-
|
136
|
-
|
207
|
+
scope._span.set_attribute(
|
208
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
|
209
|
+
)
|
210
|
+
scope._span.set_attribute(
|
211
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens
|
212
|
+
)
|
213
|
+
scope._span.set_attribute(
|
214
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
|
215
|
+
scope._input_tokens + scope._output_tokens,
|
216
|
+
)
|
137
217
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
138
218
|
|
139
219
|
# Span Attributes for Tools - optimized
|
140
220
|
if scope._tools:
|
141
221
|
tools = scope._tools if isinstance(scope._tools, list) else [scope._tools]
|
142
222
|
|
143
|
-
names, ids, args =
|
144
|
-
(
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
223
|
+
names, ids, args = (
|
224
|
+
zip(
|
225
|
+
*[
|
226
|
+
(
|
227
|
+
t.get("function", {}).get("name", ""),
|
228
|
+
str(t.get("id", "")),
|
229
|
+
str(t.get("function", {}).get("arguments", "")),
|
230
|
+
)
|
231
|
+
for t in tools
|
232
|
+
if isinstance(t, dict) and t
|
233
|
+
]
|
234
|
+
)
|
235
|
+
if tools
|
236
|
+
else ([], [], [])
|
237
|
+
)
|
149
238
|
|
150
|
-
scope._span.set_attribute(
|
151
|
-
|
152
|
-
|
239
|
+
scope._span.set_attribute(
|
240
|
+
SemanticConvention.GEN_AI_TOOL_NAME, ", ".join(filter(None, names))
|
241
|
+
)
|
242
|
+
scope._span.set_attribute(
|
243
|
+
SemanticConvention.GEN_AI_TOOL_CALL_ID, ", ".join(filter(None, ids))
|
244
|
+
)
|
245
|
+
scope._span.set_attribute(
|
246
|
+
SemanticConvention.GEN_AI_TOOL_ARGS, ", ".join(filter(None, args))
|
247
|
+
)
|
153
248
|
|
154
249
|
# Span Attributes for Content
|
155
250
|
if capture_message_content:
|
156
251
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
157
|
-
scope._span.set_attribute(
|
252
|
+
scope._span.set_attribute(
|
253
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse
|
254
|
+
)
|
158
255
|
|
159
256
|
# To be removed once the change to span_attributes (from span events) is complete
|
160
257
|
scope._span.add_event(
|
@@ -174,23 +271,69 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
174
271
|
|
175
272
|
# Metrics
|
176
273
|
if not disable_metrics:
|
177
|
-
record_completion_metrics(
|
178
|
-
|
179
|
-
|
180
|
-
|
274
|
+
record_completion_metrics(
|
275
|
+
metrics,
|
276
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
277
|
+
SemanticConvention.GEN_AI_SYSTEM_LITELLM,
|
278
|
+
scope._server_address,
|
279
|
+
scope._server_port,
|
280
|
+
request_model,
|
281
|
+
scope._response_model,
|
282
|
+
environment,
|
283
|
+
application_name,
|
284
|
+
scope._start_time,
|
285
|
+
scope._end_time,
|
286
|
+
scope._input_tokens,
|
287
|
+
scope._output_tokens,
|
288
|
+
cost,
|
289
|
+
scope._tbt,
|
290
|
+
scope._ttft,
|
291
|
+
)
|
292
|
+
|
181
293
|
|
182
|
-
def process_streaming_chat_response(
|
183
|
-
|
294
|
+
def process_streaming_chat_response(
|
295
|
+
scope,
|
296
|
+
pricing_info,
|
297
|
+
environment,
|
298
|
+
application_name,
|
299
|
+
metrics,
|
300
|
+
capture_message_content=False,
|
301
|
+
disable_metrics=False,
|
302
|
+
version="",
|
303
|
+
):
|
184
304
|
"""
|
185
305
|
Process streaming chat request and generate Telemetry
|
186
306
|
"""
|
187
307
|
|
188
|
-
common_chat_logic(
|
189
|
-
|
308
|
+
common_chat_logic(
|
309
|
+
scope,
|
310
|
+
pricing_info,
|
311
|
+
environment,
|
312
|
+
application_name,
|
313
|
+
metrics,
|
314
|
+
capture_message_content,
|
315
|
+
disable_metrics,
|
316
|
+
version,
|
317
|
+
is_stream=True,
|
318
|
+
)
|
319
|
+
|
190
320
|
|
191
|
-
def process_chat_response(
|
192
|
-
|
193
|
-
|
321
|
+
def process_chat_response(
|
322
|
+
response,
|
323
|
+
request_model,
|
324
|
+
pricing_info,
|
325
|
+
server_port,
|
326
|
+
server_address,
|
327
|
+
environment,
|
328
|
+
application_name,
|
329
|
+
metrics,
|
330
|
+
start_time,
|
331
|
+
span,
|
332
|
+
capture_message_content=False,
|
333
|
+
disable_metrics=False,
|
334
|
+
version="1.0.0",
|
335
|
+
**kwargs,
|
336
|
+
):
|
194
337
|
"""
|
195
338
|
Process chat request and generate Telemetry
|
196
339
|
"""
|
@@ -206,12 +349,14 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
|
|
206
349
|
(choice.get("message", {}).get("content") or "")
|
207
350
|
for choice in response_dict.get("choices", [])
|
208
351
|
)
|
209
|
-
scope._input_tokens = response_dict.get(
|
210
|
-
scope._output_tokens = response_dict.get(
|
211
|
-
scope._response_id = response_dict.get(
|
212
|
-
scope._response_model = response_dict.get(
|
213
|
-
scope._finish_reason = str(
|
214
|
-
|
352
|
+
scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
|
353
|
+
scope._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
|
354
|
+
scope._response_id = response_dict.get("id")
|
355
|
+
scope._response_model = response_dict.get("model")
|
356
|
+
scope._finish_reason = str(
|
357
|
+
response_dict.get("choices", [])[0].get("finish_reason", "")
|
358
|
+
)
|
359
|
+
scope._response_service_tier = str(response_dict.get("system_fingerprint", ""))
|
215
360
|
scope._timestamps = []
|
216
361
|
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
217
362
|
scope._server_address, scope._server_port = server_address, server_port
|
@@ -219,18 +364,43 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
|
|
219
364
|
|
220
365
|
# Handle tool calls
|
221
366
|
if scope._kwargs.get("tools"):
|
222
|
-
scope._tools =
|
367
|
+
scope._tools = (
|
368
|
+
response_dict.get("choices", [{}])[0].get("message", {}).get("tool_calls")
|
369
|
+
)
|
223
370
|
else:
|
224
371
|
scope._tools = None
|
225
372
|
|
226
|
-
common_chat_logic(
|
227
|
-
|
373
|
+
common_chat_logic(
|
374
|
+
scope,
|
375
|
+
pricing_info,
|
376
|
+
environment,
|
377
|
+
application_name,
|
378
|
+
metrics,
|
379
|
+
capture_message_content,
|
380
|
+
disable_metrics,
|
381
|
+
version,
|
382
|
+
is_stream=False,
|
383
|
+
)
|
228
384
|
|
229
385
|
return response
|
230
386
|
|
231
|
-
|
232
|
-
|
233
|
-
|
387
|
+
|
388
|
+
def process_embedding_response(
|
389
|
+
response,
|
390
|
+
request_model,
|
391
|
+
pricing_info,
|
392
|
+
server_port,
|
393
|
+
server_address,
|
394
|
+
environment,
|
395
|
+
application_name,
|
396
|
+
metrics,
|
397
|
+
start_time,
|
398
|
+
span,
|
399
|
+
capture_message_content=False,
|
400
|
+
disable_metrics=False,
|
401
|
+
version="1.0.0",
|
402
|
+
**kwargs,
|
403
|
+
):
|
234
404
|
"""
|
235
405
|
Process embedding request and generate Telemetry
|
236
406
|
"""
|
@@ -242,8 +412,8 @@ def process_embedding_response(response, request_model, pricing_info, server_por
|
|
242
412
|
scope._start_time = start_time
|
243
413
|
scope._end_time = time.time()
|
244
414
|
scope._span = span
|
245
|
-
scope._input_tokens = response_dict.get(
|
246
|
-
scope._response_model = response_dict.get(
|
415
|
+
scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
|
416
|
+
scope._response_model = response_dict.get("model")
|
247
417
|
scope._server_address, scope._server_port = server_address, server_port
|
248
418
|
scope._kwargs = kwargs
|
249
419
|
|
@@ -251,29 +421,58 @@ def process_embedding_response(response, request_model, pricing_info, server_por
|
|
251
421
|
cost = get_embed_model_cost(request_model, pricing_info, scope._input_tokens)
|
252
422
|
|
253
423
|
# Common Span Attributes
|
254
|
-
common_span_attributes(
|
255
|
-
|
256
|
-
|
257
|
-
|
424
|
+
common_span_attributes(
|
425
|
+
scope,
|
426
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
427
|
+
SemanticConvention.GEN_AI_SYSTEM_LITELLM,
|
428
|
+
scope._server_address,
|
429
|
+
scope._server_port,
|
430
|
+
request_model,
|
431
|
+
scope._response_model,
|
432
|
+
environment,
|
433
|
+
application_name,
|
434
|
+
False,
|
435
|
+
0,
|
436
|
+
scope._end_time - scope._start_time,
|
437
|
+
version,
|
438
|
+
)
|
439
|
+
|
440
|
+
# Helper function to handle None values with proper defaults
|
441
|
+
def safe_get(value, default):
|
442
|
+
return default if value is None else value
|
258
443
|
|
259
444
|
# Span Attributes for Request parameters
|
260
|
-
scope._span.set_attribute(
|
261
|
-
|
445
|
+
scope._span.set_attribute(
|
446
|
+
SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS,
|
447
|
+
[scope._kwargs.get("encoding_format", "float")],
|
448
|
+
)
|
449
|
+
scope._span.set_attribute(
|
450
|
+
SemanticConvention.GEN_AI_REQUEST_USER, safe_get(scope._kwargs.get("user"), "")
|
451
|
+
)
|
262
452
|
|
263
453
|
# Span Attributes for Cost and Tokens
|
264
|
-
scope._span.set_attribute(
|
265
|
-
|
454
|
+
scope._span.set_attribute(
|
455
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
|
456
|
+
)
|
457
|
+
scope._span.set_attribute(
|
458
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens
|
459
|
+
)
|
266
460
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
267
461
|
|
268
462
|
# Span Attributes for Content
|
269
463
|
if capture_message_content:
|
270
|
-
scope._span.set_attribute(
|
464
|
+
scope._span.set_attribute(
|
465
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT,
|
466
|
+
str(scope._kwargs.get("input", "")),
|
467
|
+
)
|
271
468
|
|
272
469
|
# To be removed once the change to span_attributes (from span events) is complete
|
273
470
|
scope._span.add_event(
|
274
471
|
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
275
472
|
attributes={
|
276
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(
|
473
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(
|
474
|
+
scope._kwargs.get("input", "")
|
475
|
+
),
|
277
476
|
},
|
278
477
|
)
|
279
478
|
|
@@ -281,8 +480,20 @@ def process_embedding_response(response, request_model, pricing_info, server_por
|
|
281
480
|
|
282
481
|
# Metrics
|
283
482
|
if not disable_metrics:
|
284
|
-
record_embedding_metrics(
|
285
|
-
|
286
|
-
|
483
|
+
record_embedding_metrics(
|
484
|
+
metrics,
|
485
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
486
|
+
SemanticConvention.GEN_AI_SYSTEM_LITELLM,
|
487
|
+
scope._server_address,
|
488
|
+
scope._server_port,
|
489
|
+
request_model,
|
490
|
+
scope._response_model,
|
491
|
+
environment,
|
492
|
+
application_name,
|
493
|
+
scope._start_time,
|
494
|
+
scope._end_time,
|
495
|
+
scope._input_tokens,
|
496
|
+
cost,
|
497
|
+
)
|
287
498
|
|
288
499
|
return response
|