openlit 1.34.30__py3-none-any.whl → 1.34.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +235 -86
- openlit/__init__.py +16 -13
- openlit/_instrumentors.py +2 -1
- openlit/evals/all.py +50 -21
- openlit/evals/bias_detection.py +47 -20
- openlit/evals/hallucination.py +53 -22
- openlit/evals/toxicity.py +50 -21
- openlit/evals/utils.py +54 -30
- openlit/guard/all.py +61 -19
- openlit/guard/prompt_injection.py +34 -14
- openlit/guard/restrict_topic.py +46 -15
- openlit/guard/sensitive_topic.py +34 -14
- openlit/guard/utils.py +58 -22
- openlit/instrumentation/ag2/__init__.py +24 -8
- openlit/instrumentation/ag2/ag2.py +34 -13
- openlit/instrumentation/ag2/async_ag2.py +34 -13
- openlit/instrumentation/ag2/utils.py +133 -30
- openlit/instrumentation/ai21/__init__.py +43 -14
- openlit/instrumentation/ai21/ai21.py +47 -21
- openlit/instrumentation/ai21/async_ai21.py +47 -21
- openlit/instrumentation/ai21/utils.py +299 -78
- openlit/instrumentation/anthropic/__init__.py +21 -4
- openlit/instrumentation/anthropic/anthropic.py +28 -17
- openlit/instrumentation/anthropic/async_anthropic.py +28 -17
- openlit/instrumentation/anthropic/utils.py +145 -35
- openlit/instrumentation/assemblyai/__init__.py +11 -2
- openlit/instrumentation/assemblyai/assemblyai.py +15 -4
- openlit/instrumentation/assemblyai/utils.py +120 -25
- openlit/instrumentation/astra/__init__.py +43 -10
- openlit/instrumentation/astra/astra.py +28 -5
- openlit/instrumentation/astra/async_astra.py +28 -5
- openlit/instrumentation/astra/utils.py +151 -55
- openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/utils.py +307 -83
- openlit/instrumentation/bedrock/__init__.py +21 -4
- openlit/instrumentation/bedrock/bedrock.py +63 -25
- openlit/instrumentation/bedrock/utils.py +139 -30
- openlit/instrumentation/chroma/__init__.py +89 -16
- openlit/instrumentation/chroma/chroma.py +28 -6
- openlit/instrumentation/chroma/utils.py +167 -51
- openlit/instrumentation/cohere/__init__.py +63 -18
- openlit/instrumentation/cohere/async_cohere.py +63 -24
- openlit/instrumentation/cohere/cohere.py +63 -24
- openlit/instrumentation/cohere/utils.py +286 -73
- openlit/instrumentation/controlflow/__init__.py +35 -9
- openlit/instrumentation/controlflow/controlflow.py +66 -33
- openlit/instrumentation/crawl4ai/__init__.py +25 -10
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
- openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
- openlit/instrumentation/crewai/__init__.py +40 -15
- openlit/instrumentation/crewai/async_crewai.py +32 -7
- openlit/instrumentation/crewai/crewai.py +32 -7
- openlit/instrumentation/crewai/utils.py +159 -56
- openlit/instrumentation/dynamiq/__init__.py +46 -12
- openlit/instrumentation/dynamiq/dynamiq.py +74 -33
- openlit/instrumentation/elevenlabs/__init__.py +23 -4
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/utils.py +128 -25
- openlit/instrumentation/embedchain/__init__.py +11 -2
- openlit/instrumentation/embedchain/embedchain.py +68 -35
- openlit/instrumentation/firecrawl/__init__.py +24 -7
- openlit/instrumentation/firecrawl/firecrawl.py +46 -20
- openlit/instrumentation/google_ai_studio/__init__.py +45 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/utils.py +180 -67
- openlit/instrumentation/gpt4all/__init__.py +22 -7
- openlit/instrumentation/gpt4all/gpt4all.py +67 -29
- openlit/instrumentation/gpt4all/utils.py +285 -61
- openlit/instrumentation/gpu/__init__.py +128 -47
- openlit/instrumentation/groq/__init__.py +21 -4
- openlit/instrumentation/groq/async_groq.py +33 -21
- openlit/instrumentation/groq/groq.py +33 -21
- openlit/instrumentation/groq/utils.py +192 -55
- openlit/instrumentation/haystack/__init__.py +70 -24
- openlit/instrumentation/haystack/async_haystack.py +28 -6
- openlit/instrumentation/haystack/haystack.py +28 -6
- openlit/instrumentation/haystack/utils.py +196 -74
- openlit/instrumentation/julep/__init__.py +69 -19
- openlit/instrumentation/julep/async_julep.py +53 -27
- openlit/instrumentation/julep/julep.py +53 -28
- openlit/instrumentation/langchain/__init__.py +74 -63
- openlit/instrumentation/langchain/callback_handler.py +1100 -0
- openlit/instrumentation/langchain_community/__init__.py +13 -2
- openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/utils.py +35 -9
- openlit/instrumentation/letta/__init__.py +68 -15
- openlit/instrumentation/letta/letta.py +99 -54
- openlit/instrumentation/litellm/__init__.py +43 -14
- openlit/instrumentation/litellm/async_litellm.py +51 -26
- openlit/instrumentation/litellm/litellm.py +51 -26
- openlit/instrumentation/litellm/utils.py +304 -102
- openlit/instrumentation/llamaindex/__init__.py +267 -90
- openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/utils.py +204 -91
- openlit/instrumentation/mem0/__init__.py +11 -2
- openlit/instrumentation/mem0/mem0.py +50 -29
- openlit/instrumentation/milvus/__init__.py +10 -2
- openlit/instrumentation/milvus/milvus.py +31 -6
- openlit/instrumentation/milvus/utils.py +166 -67
- openlit/instrumentation/mistral/__init__.py +63 -18
- openlit/instrumentation/mistral/async_mistral.py +63 -24
- openlit/instrumentation/mistral/mistral.py +63 -24
- openlit/instrumentation/mistral/utils.py +277 -69
- openlit/instrumentation/multion/__init__.py +69 -19
- openlit/instrumentation/multion/async_multion.py +57 -26
- openlit/instrumentation/multion/multion.py +57 -26
- openlit/instrumentation/ollama/__init__.py +39 -18
- openlit/instrumentation/ollama/async_ollama.py +57 -26
- openlit/instrumentation/ollama/ollama.py +57 -26
- openlit/instrumentation/ollama/utils.py +226 -50
- openlit/instrumentation/openai/__init__.py +156 -32
- openlit/instrumentation/openai/async_openai.py +147 -67
- openlit/instrumentation/openai/openai.py +150 -67
- openlit/instrumentation/openai/utils.py +657 -185
- openlit/instrumentation/openai_agents/__init__.py +5 -1
- openlit/instrumentation/openai_agents/processor.py +110 -90
- openlit/instrumentation/phidata/__init__.py +13 -5
- openlit/instrumentation/phidata/phidata.py +67 -32
- openlit/instrumentation/pinecone/__init__.py +48 -9
- openlit/instrumentation/pinecone/async_pinecone.py +27 -5
- openlit/instrumentation/pinecone/pinecone.py +27 -5
- openlit/instrumentation/pinecone/utils.py +153 -47
- openlit/instrumentation/premai/__init__.py +22 -7
- openlit/instrumentation/premai/premai.py +51 -26
- openlit/instrumentation/premai/utils.py +246 -59
- openlit/instrumentation/pydantic_ai/__init__.py +49 -22
- openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
- openlit/instrumentation/pydantic_ai/utils.py +89 -24
- openlit/instrumentation/qdrant/__init__.py +19 -4
- openlit/instrumentation/qdrant/async_qdrant.py +33 -7
- openlit/instrumentation/qdrant/qdrant.py +33 -7
- openlit/instrumentation/qdrant/utils.py +228 -93
- openlit/instrumentation/reka/__init__.py +23 -10
- openlit/instrumentation/reka/async_reka.py +17 -11
- openlit/instrumentation/reka/reka.py +17 -11
- openlit/instrumentation/reka/utils.py +138 -36
- openlit/instrumentation/together/__init__.py +44 -12
- openlit/instrumentation/together/async_together.py +50 -27
- openlit/instrumentation/together/together.py +50 -27
- openlit/instrumentation/together/utils.py +301 -71
- openlit/instrumentation/transformers/__init__.py +2 -1
- openlit/instrumentation/transformers/transformers.py +13 -3
- openlit/instrumentation/transformers/utils.py +139 -36
- openlit/instrumentation/vertexai/__init__.py +81 -16
- openlit/instrumentation/vertexai/async_vertexai.py +33 -15
- openlit/instrumentation/vertexai/utils.py +123 -27
- openlit/instrumentation/vertexai/vertexai.py +33 -15
- openlit/instrumentation/vllm/__init__.py +12 -5
- openlit/instrumentation/vllm/utils.py +121 -31
- openlit/instrumentation/vllm/vllm.py +16 -10
- openlit/otel/events.py +35 -10
- openlit/otel/metrics.py +32 -24
- openlit/otel/tracing.py +24 -9
- openlit/semcov/__init__.py +72 -6
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
- openlit-1.34.31.dist-info/RECORD +166 -0
- openlit/instrumentation/langchain/async_langchain.py +0 -102
- openlit/instrumentation/langchain/langchain.py +0 -102
- openlit/instrumentation/langchain/utils.py +0 -252
- openlit-1.34.30.dist-info/RECORD +0 -168
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
@@ -1,6 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
LiteLLM OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
+
|
4
5
|
import time
|
5
6
|
|
6
7
|
from opentelemetry.trace import Status, StatusCode
|
@@ -17,6 +18,7 @@ from openlit.__helpers import (
|
|
17
18
|
)
|
18
19
|
from openlit.semcov import SemanticConvention
|
19
20
|
|
21
|
+
|
20
22
|
def format_content(messages):
|
21
23
|
"""
|
22
24
|
Process a list of messages to extract content.
|
@@ -24,20 +26,22 @@ def format_content(messages):
|
|
24
26
|
|
25
27
|
formatted_messages = []
|
26
28
|
for message in messages:
|
27
|
-
role = message[
|
28
|
-
content = message[
|
29
|
+
role = message["role"]
|
30
|
+
content = message["content"]
|
29
31
|
|
30
32
|
if isinstance(content, list):
|
31
33
|
content_str = ", ".join(
|
32
|
-
f
|
33
|
-
if "type" in item
|
34
|
+
f"{item['type']}: {item['text'] if 'text' in item else item['image_url']}"
|
35
|
+
if "type" in item
|
36
|
+
else f"text: {item['text']}"
|
34
37
|
for item in content
|
35
38
|
)
|
36
|
-
formatted_messages.append(f
|
39
|
+
formatted_messages.append(f"{role}: {content_str}")
|
37
40
|
else:
|
38
|
-
formatted_messages.append(f
|
41
|
+
formatted_messages.append(f"{role}: {content}")
|
42
|
+
|
43
|
+
return "\n".join(formatted_messages)
|
39
44
|
|
40
|
-
return '\n'.join(formatted_messages)
|
41
45
|
|
42
46
|
def process_chunk(scope, chunk):
|
43
47
|
"""
|
@@ -55,45 +59,63 @@ def process_chunk(scope, chunk):
|
|
55
59
|
chunked = response_as_dict(chunk)
|
56
60
|
|
57
61
|
# Collect message IDs and aggregated response from events
|
58
|
-
if
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
+
if len(chunked.get("choices", [])) > 0 and (
|
63
|
+
"delta" in chunked.get("choices")[0]
|
64
|
+
and "content" in chunked.get("choices")[0].get("delta", {})
|
65
|
+
):
|
66
|
+
content = chunked.get("choices")[0].get("delta").get("content")
|
62
67
|
if content:
|
63
68
|
scope._llmresponse += content
|
64
69
|
|
65
70
|
# Handle tool calls in streaming - optimized
|
66
|
-
delta_tools = chunked.get(
|
71
|
+
delta_tools = chunked.get("choices", [{}])[0].get("delta", {}).get("tool_calls")
|
67
72
|
if delta_tools:
|
68
73
|
scope._tools = scope._tools or []
|
69
74
|
|
70
75
|
for tool in delta_tools:
|
71
|
-
idx = tool.get(
|
76
|
+
idx = tool.get("index", 0)
|
72
77
|
|
73
78
|
# Extend list if needed
|
74
79
|
scope._tools.extend([{}] * (idx + 1 - len(scope._tools)))
|
75
80
|
|
76
|
-
if tool.get(
|
77
|
-
func = tool.get(
|
81
|
+
if tool.get("id"): # New tool (id exists)
|
82
|
+
func = tool.get("function", {})
|
78
83
|
scope._tools[idx] = {
|
79
|
-
|
80
|
-
|
81
|
-
|
84
|
+
"id": tool["id"],
|
85
|
+
"function": {
|
86
|
+
"name": func.get("name", ""),
|
87
|
+
"arguments": func.get("arguments", ""),
|
88
|
+
},
|
89
|
+
"type": tool.get("type", "function"),
|
82
90
|
}
|
83
|
-
elif
|
84
|
-
scope._tools[idx]
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
scope.
|
92
|
-
scope.
|
91
|
+
elif (
|
92
|
+
scope._tools[idx] and "function" in tool
|
93
|
+
): # Append args (id is None)
|
94
|
+
scope._tools[idx]["function"]["arguments"] += tool["function"].get(
|
95
|
+
"arguments", ""
|
96
|
+
)
|
97
|
+
|
98
|
+
if chunked.get("usage"):
|
99
|
+
scope._input_tokens = chunked.get("usage").get("prompt_tokens", 0)
|
100
|
+
scope._output_tokens = chunked.get("usage").get("completion_tokens", 0)
|
101
|
+
scope._response_id = chunked.get("id")
|
102
|
+
scope._response_model = chunked.get("model")
|
103
|
+
scope._finish_reason = chunked.get("choices", [{}])[0].get("finish_reason")
|
104
|
+
scope._response_service_tier = str(chunked.get("system_fingerprint", ""))
|
93
105
|
scope._end_time = time.time()
|
94
106
|
|
95
|
-
|
96
|
-
|
107
|
+
|
108
|
+
def common_chat_logic(
|
109
|
+
scope,
|
110
|
+
pricing_info,
|
111
|
+
environment,
|
112
|
+
application_name,
|
113
|
+
metrics,
|
114
|
+
capture_message_content,
|
115
|
+
disable_metrics,
|
116
|
+
version,
|
117
|
+
is_stream,
|
118
|
+
):
|
97
119
|
"""
|
98
120
|
Process chat request and generate Telemetry
|
99
121
|
"""
|
@@ -101,65 +123,135 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
101
123
|
if len(scope._timestamps) > 1:
|
102
124
|
scope._tbt = calculate_tbt(scope._timestamps)
|
103
125
|
|
104
|
-
prompt = format_content(scope._kwargs.get(
|
105
|
-
request_model = scope._kwargs.get(
|
126
|
+
prompt = format_content(scope._kwargs.get("messages", []))
|
127
|
+
request_model = scope._kwargs.get("model", "openai/gpt-4o")
|
106
128
|
|
107
|
-
cost = get_chat_model_cost(
|
129
|
+
cost = get_chat_model_cost(
|
130
|
+
request_model, pricing_info, scope._input_tokens, scope._output_tokens
|
131
|
+
)
|
108
132
|
|
109
133
|
# Common Span Attributes
|
110
|
-
common_span_attributes(
|
111
|
-
|
112
|
-
|
113
|
-
|
134
|
+
common_span_attributes(
|
135
|
+
scope,
|
136
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
137
|
+
SemanticConvention.GEN_AI_SYSTEM_LITELLM,
|
138
|
+
scope._server_address,
|
139
|
+
scope._server_port,
|
140
|
+
request_model,
|
141
|
+
scope._response_model,
|
142
|
+
environment,
|
143
|
+
application_name,
|
144
|
+
is_stream,
|
145
|
+
scope._tbt,
|
146
|
+
scope._ttft,
|
147
|
+
version,
|
148
|
+
)
|
114
149
|
|
115
150
|
# Helper function to handle None values with proper defaults
|
116
151
|
def safe_get(value, default):
|
117
152
|
return default if value is None else value
|
118
153
|
|
119
154
|
# Span Attributes for Request parameters
|
120
|
-
scope._span.set_attribute(
|
121
|
-
|
122
|
-
|
123
|
-
scope._span.set_attribute(
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
scope._span.set_attribute(
|
128
|
-
|
129
|
-
|
155
|
+
scope._span.set_attribute(
|
156
|
+
SemanticConvention.GEN_AI_REQUEST_SEED, safe_get(scope._kwargs.get("seed"), "")
|
157
|
+
)
|
158
|
+
scope._span.set_attribute(
|
159
|
+
SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
160
|
+
safe_get(scope._kwargs.get("frequency_penalty"), 0.0),
|
161
|
+
)
|
162
|
+
scope._span.set_attribute(
|
163
|
+
SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
164
|
+
safe_get(scope._kwargs.get("max_tokens"), -1),
|
165
|
+
)
|
166
|
+
scope._span.set_attribute(
|
167
|
+
SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
168
|
+
safe_get(scope._kwargs.get("presence_penalty"), 0.0),
|
169
|
+
)
|
170
|
+
scope._span.set_attribute(
|
171
|
+
SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop", [])
|
172
|
+
)
|
173
|
+
scope._span.set_attribute(
|
174
|
+
SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
175
|
+
safe_get(scope._kwargs.get("temperature"), 1.0),
|
176
|
+
)
|
177
|
+
scope._span.set_attribute(
|
178
|
+
SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
179
|
+
safe_get(scope._kwargs.get("top_p"), 1.0),
|
180
|
+
)
|
181
|
+
scope._span.set_attribute(
|
182
|
+
SemanticConvention.GEN_AI_REQUEST_USER, safe_get(scope._kwargs.get("user"), "")
|
183
|
+
)
|
184
|
+
scope._span.set_attribute(
|
185
|
+
SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
|
186
|
+
safe_get(scope._kwargs.get("service_tier"), "auto"),
|
187
|
+
)
|
130
188
|
|
131
189
|
# Span Attributes for Response parameters
|
132
190
|
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
|
133
|
-
scope._span.set_attribute(
|
134
|
-
|
135
|
-
|
136
|
-
scope._span.set_attribute(
|
191
|
+
scope._span.set_attribute(
|
192
|
+
SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason]
|
193
|
+
)
|
194
|
+
scope._span.set_attribute(
|
195
|
+
SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER, scope._response_service_tier
|
196
|
+
)
|
197
|
+
scope._span.set_attribute(
|
198
|
+
SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
|
199
|
+
scope._response_service_tier,
|
200
|
+
)
|
201
|
+
scope._span.set_attribute(
|
202
|
+
SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
203
|
+
"text" if isinstance(scope._llmresponse, str) else "json",
|
204
|
+
)
|
137
205
|
|
138
206
|
# Span Attributes for Cost and Tokens
|
139
|
-
scope._span.set_attribute(
|
140
|
-
|
141
|
-
|
207
|
+
scope._span.set_attribute(
|
208
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
|
209
|
+
)
|
210
|
+
scope._span.set_attribute(
|
211
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens
|
212
|
+
)
|
213
|
+
scope._span.set_attribute(
|
214
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
|
215
|
+
scope._input_tokens + scope._output_tokens,
|
216
|
+
)
|
142
217
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
143
218
|
|
144
219
|
# Span Attributes for Tools - optimized
|
145
220
|
if scope._tools:
|
146
221
|
tools = scope._tools if isinstance(scope._tools, list) else [scope._tools]
|
147
222
|
|
148
|
-
names, ids, args =
|
149
|
-
(
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
223
|
+
names, ids, args = (
|
224
|
+
zip(
|
225
|
+
*[
|
226
|
+
(
|
227
|
+
t.get("function", {}).get("name", ""),
|
228
|
+
str(t.get("id", "")),
|
229
|
+
str(t.get("function", {}).get("arguments", "")),
|
230
|
+
)
|
231
|
+
for t in tools
|
232
|
+
if isinstance(t, dict) and t
|
233
|
+
]
|
234
|
+
)
|
235
|
+
if tools
|
236
|
+
else ([], [], [])
|
237
|
+
)
|
154
238
|
|
155
|
-
scope._span.set_attribute(
|
156
|
-
|
157
|
-
|
239
|
+
scope._span.set_attribute(
|
240
|
+
SemanticConvention.GEN_AI_TOOL_NAME, ", ".join(filter(None, names))
|
241
|
+
)
|
242
|
+
scope._span.set_attribute(
|
243
|
+
SemanticConvention.GEN_AI_TOOL_CALL_ID, ", ".join(filter(None, ids))
|
244
|
+
)
|
245
|
+
scope._span.set_attribute(
|
246
|
+
SemanticConvention.GEN_AI_TOOL_ARGS, ", ".join(filter(None, args))
|
247
|
+
)
|
158
248
|
|
159
249
|
# Span Attributes for Content
|
160
250
|
if capture_message_content:
|
161
251
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
162
|
-
scope._span.set_attribute(
|
252
|
+
scope._span.set_attribute(
|
253
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse
|
254
|
+
)
|
163
255
|
|
164
256
|
# To be removed once the change to span_attributes (from span events) is complete
|
165
257
|
scope._span.add_event(
|
@@ -179,23 +271,69 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
179
271
|
|
180
272
|
# Metrics
|
181
273
|
if not disable_metrics:
|
182
|
-
record_completion_metrics(
|
183
|
-
|
184
|
-
|
185
|
-
|
274
|
+
record_completion_metrics(
|
275
|
+
metrics,
|
276
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
277
|
+
SemanticConvention.GEN_AI_SYSTEM_LITELLM,
|
278
|
+
scope._server_address,
|
279
|
+
scope._server_port,
|
280
|
+
request_model,
|
281
|
+
scope._response_model,
|
282
|
+
environment,
|
283
|
+
application_name,
|
284
|
+
scope._start_time,
|
285
|
+
scope._end_time,
|
286
|
+
scope._input_tokens,
|
287
|
+
scope._output_tokens,
|
288
|
+
cost,
|
289
|
+
scope._tbt,
|
290
|
+
scope._ttft,
|
291
|
+
)
|
292
|
+
|
186
293
|
|
187
|
-
def process_streaming_chat_response(
|
188
|
-
|
294
|
+
def process_streaming_chat_response(
|
295
|
+
scope,
|
296
|
+
pricing_info,
|
297
|
+
environment,
|
298
|
+
application_name,
|
299
|
+
metrics,
|
300
|
+
capture_message_content=False,
|
301
|
+
disable_metrics=False,
|
302
|
+
version="",
|
303
|
+
):
|
189
304
|
"""
|
190
305
|
Process streaming chat request and generate Telemetry
|
191
306
|
"""
|
192
307
|
|
193
|
-
common_chat_logic(
|
194
|
-
|
308
|
+
common_chat_logic(
|
309
|
+
scope,
|
310
|
+
pricing_info,
|
311
|
+
environment,
|
312
|
+
application_name,
|
313
|
+
metrics,
|
314
|
+
capture_message_content,
|
315
|
+
disable_metrics,
|
316
|
+
version,
|
317
|
+
is_stream=True,
|
318
|
+
)
|
319
|
+
|
195
320
|
|
196
|
-
def process_chat_response(
|
197
|
-
|
198
|
-
|
321
|
+
def process_chat_response(
|
322
|
+
response,
|
323
|
+
request_model,
|
324
|
+
pricing_info,
|
325
|
+
server_port,
|
326
|
+
server_address,
|
327
|
+
environment,
|
328
|
+
application_name,
|
329
|
+
metrics,
|
330
|
+
start_time,
|
331
|
+
span,
|
332
|
+
capture_message_content=False,
|
333
|
+
disable_metrics=False,
|
334
|
+
version="1.0.0",
|
335
|
+
**kwargs,
|
336
|
+
):
|
199
337
|
"""
|
200
338
|
Process chat request and generate Telemetry
|
201
339
|
"""
|
@@ -211,12 +349,14 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
|
|
211
349
|
(choice.get("message", {}).get("content") or "")
|
212
350
|
for choice in response_dict.get("choices", [])
|
213
351
|
)
|
214
|
-
scope._input_tokens = response_dict.get(
|
215
|
-
scope._output_tokens = response_dict.get(
|
216
|
-
scope._response_id = response_dict.get(
|
217
|
-
scope._response_model = response_dict.get(
|
218
|
-
scope._finish_reason = str(
|
219
|
-
|
352
|
+
scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
|
353
|
+
scope._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
|
354
|
+
scope._response_id = response_dict.get("id")
|
355
|
+
scope._response_model = response_dict.get("model")
|
356
|
+
scope._finish_reason = str(
|
357
|
+
response_dict.get("choices", [])[0].get("finish_reason", "")
|
358
|
+
)
|
359
|
+
scope._response_service_tier = str(response_dict.get("system_fingerprint", ""))
|
220
360
|
scope._timestamps = []
|
221
361
|
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
222
362
|
scope._server_address, scope._server_port = server_address, server_port
|
@@ -224,18 +364,43 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
|
|
224
364
|
|
225
365
|
# Handle tool calls
|
226
366
|
if scope._kwargs.get("tools"):
|
227
|
-
scope._tools =
|
367
|
+
scope._tools = (
|
368
|
+
response_dict.get("choices", [{}])[0].get("message", {}).get("tool_calls")
|
369
|
+
)
|
228
370
|
else:
|
229
371
|
scope._tools = None
|
230
372
|
|
231
|
-
common_chat_logic(
|
232
|
-
|
373
|
+
common_chat_logic(
|
374
|
+
scope,
|
375
|
+
pricing_info,
|
376
|
+
environment,
|
377
|
+
application_name,
|
378
|
+
metrics,
|
379
|
+
capture_message_content,
|
380
|
+
disable_metrics,
|
381
|
+
version,
|
382
|
+
is_stream=False,
|
383
|
+
)
|
233
384
|
|
234
385
|
return response
|
235
386
|
|
236
|
-
|
237
|
-
|
238
|
-
|
387
|
+
|
388
|
+
def process_embedding_response(
|
389
|
+
response,
|
390
|
+
request_model,
|
391
|
+
pricing_info,
|
392
|
+
server_port,
|
393
|
+
server_address,
|
394
|
+
environment,
|
395
|
+
application_name,
|
396
|
+
metrics,
|
397
|
+
start_time,
|
398
|
+
span,
|
399
|
+
capture_message_content=False,
|
400
|
+
disable_metrics=False,
|
401
|
+
version="1.0.0",
|
402
|
+
**kwargs,
|
403
|
+
):
|
239
404
|
"""
|
240
405
|
Process embedding request and generate Telemetry
|
241
406
|
"""
|
@@ -247,8 +412,8 @@ def process_embedding_response(response, request_model, pricing_info, server_por
|
|
247
412
|
scope._start_time = start_time
|
248
413
|
scope._end_time = time.time()
|
249
414
|
scope._span = span
|
250
|
-
scope._input_tokens = response_dict.get(
|
251
|
-
scope._response_model = response_dict.get(
|
415
|
+
scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
|
416
|
+
scope._response_model = response_dict.get("model")
|
252
417
|
scope._server_address, scope._server_port = server_address, server_port
|
253
418
|
scope._kwargs = kwargs
|
254
419
|
|
@@ -256,33 +421,58 @@ def process_embedding_response(response, request_model, pricing_info, server_por
|
|
256
421
|
cost = get_embed_model_cost(request_model, pricing_info, scope._input_tokens)
|
257
422
|
|
258
423
|
# Common Span Attributes
|
259
|
-
common_span_attributes(
|
260
|
-
|
261
|
-
|
262
|
-
|
424
|
+
common_span_attributes(
|
425
|
+
scope,
|
426
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
427
|
+
SemanticConvention.GEN_AI_SYSTEM_LITELLM,
|
428
|
+
scope._server_address,
|
429
|
+
scope._server_port,
|
430
|
+
request_model,
|
431
|
+
scope._response_model,
|
432
|
+
environment,
|
433
|
+
application_name,
|
434
|
+
False,
|
435
|
+
0,
|
436
|
+
scope._end_time - scope._start_time,
|
437
|
+
version,
|
438
|
+
)
|
263
439
|
|
264
440
|
# Helper function to handle None values with proper defaults
|
265
441
|
def safe_get(value, default):
|
266
442
|
return default if value is None else value
|
267
443
|
|
268
444
|
# Span Attributes for Request parameters
|
269
|
-
scope._span.set_attribute(
|
270
|
-
|
445
|
+
scope._span.set_attribute(
|
446
|
+
SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS,
|
447
|
+
[scope._kwargs.get("encoding_format", "float")],
|
448
|
+
)
|
449
|
+
scope._span.set_attribute(
|
450
|
+
SemanticConvention.GEN_AI_REQUEST_USER, safe_get(scope._kwargs.get("user"), "")
|
451
|
+
)
|
271
452
|
|
272
453
|
# Span Attributes for Cost and Tokens
|
273
|
-
scope._span.set_attribute(
|
274
|
-
|
454
|
+
scope._span.set_attribute(
|
455
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
|
456
|
+
)
|
457
|
+
scope._span.set_attribute(
|
458
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens
|
459
|
+
)
|
275
460
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
276
461
|
|
277
462
|
# Span Attributes for Content
|
278
463
|
if capture_message_content:
|
279
|
-
scope._span.set_attribute(
|
464
|
+
scope._span.set_attribute(
|
465
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT,
|
466
|
+
str(scope._kwargs.get("input", "")),
|
467
|
+
)
|
280
468
|
|
281
469
|
# To be removed once the change to span_attributes (from span events) is complete
|
282
470
|
scope._span.add_event(
|
283
471
|
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
284
472
|
attributes={
|
285
|
-
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(
|
473
|
+
SemanticConvention.GEN_AI_CONTENT_PROMPT: str(
|
474
|
+
scope._kwargs.get("input", "")
|
475
|
+
),
|
286
476
|
},
|
287
477
|
)
|
288
478
|
|
@@ -290,8 +480,20 @@ def process_embedding_response(response, request_model, pricing_info, server_por
|
|
290
480
|
|
291
481
|
# Metrics
|
292
482
|
if not disable_metrics:
|
293
|
-
record_embedding_metrics(
|
294
|
-
|
295
|
-
|
483
|
+
record_embedding_metrics(
|
484
|
+
metrics,
|
485
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
486
|
+
SemanticConvention.GEN_AI_SYSTEM_LITELLM,
|
487
|
+
scope._server_address,
|
488
|
+
scope._server_port,
|
489
|
+
request_model,
|
490
|
+
scope._response_model,
|
491
|
+
environment,
|
492
|
+
application_name,
|
493
|
+
scope._start_time,
|
494
|
+
scope._end_time,
|
495
|
+
scope._input_tokens,
|
496
|
+
cost,
|
497
|
+
)
|
296
498
|
|
297
499
|
return response
|