openlit 1.34.29__py3-none-any.whl → 1.34.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +235 -86
- openlit/__init__.py +16 -13
- openlit/_instrumentors.py +2 -1
- openlit/evals/all.py +50 -21
- openlit/evals/bias_detection.py +47 -20
- openlit/evals/hallucination.py +53 -22
- openlit/evals/toxicity.py +50 -21
- openlit/evals/utils.py +54 -30
- openlit/guard/all.py +61 -19
- openlit/guard/prompt_injection.py +34 -14
- openlit/guard/restrict_topic.py +46 -15
- openlit/guard/sensitive_topic.py +34 -14
- openlit/guard/utils.py +58 -22
- openlit/instrumentation/ag2/__init__.py +24 -8
- openlit/instrumentation/ag2/ag2.py +34 -13
- openlit/instrumentation/ag2/async_ag2.py +34 -13
- openlit/instrumentation/ag2/utils.py +133 -30
- openlit/instrumentation/ai21/__init__.py +43 -14
- openlit/instrumentation/ai21/ai21.py +47 -21
- openlit/instrumentation/ai21/async_ai21.py +47 -21
- openlit/instrumentation/ai21/utils.py +299 -78
- openlit/instrumentation/anthropic/__init__.py +21 -4
- openlit/instrumentation/anthropic/anthropic.py +28 -17
- openlit/instrumentation/anthropic/async_anthropic.py +28 -17
- openlit/instrumentation/anthropic/utils.py +145 -35
- openlit/instrumentation/assemblyai/__init__.py +11 -2
- openlit/instrumentation/assemblyai/assemblyai.py +15 -4
- openlit/instrumentation/assemblyai/utils.py +120 -25
- openlit/instrumentation/astra/__init__.py +43 -10
- openlit/instrumentation/astra/astra.py +28 -5
- openlit/instrumentation/astra/async_astra.py +28 -5
- openlit/instrumentation/astra/utils.py +151 -55
- openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/utils.py +307 -83
- openlit/instrumentation/bedrock/__init__.py +21 -4
- openlit/instrumentation/bedrock/bedrock.py +63 -25
- openlit/instrumentation/bedrock/utils.py +139 -30
- openlit/instrumentation/chroma/__init__.py +89 -16
- openlit/instrumentation/chroma/chroma.py +28 -6
- openlit/instrumentation/chroma/utils.py +167 -51
- openlit/instrumentation/cohere/__init__.py +63 -18
- openlit/instrumentation/cohere/async_cohere.py +63 -24
- openlit/instrumentation/cohere/cohere.py +63 -24
- openlit/instrumentation/cohere/utils.py +286 -73
- openlit/instrumentation/controlflow/__init__.py +35 -9
- openlit/instrumentation/controlflow/controlflow.py +66 -33
- openlit/instrumentation/crawl4ai/__init__.py +25 -10
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
- openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
- openlit/instrumentation/crewai/__init__.py +111 -24
- openlit/instrumentation/crewai/async_crewai.py +114 -0
- openlit/instrumentation/crewai/crewai.py +104 -131
- openlit/instrumentation/crewai/utils.py +615 -0
- openlit/instrumentation/dynamiq/__init__.py +46 -12
- openlit/instrumentation/dynamiq/dynamiq.py +74 -33
- openlit/instrumentation/elevenlabs/__init__.py +23 -4
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/utils.py +128 -25
- openlit/instrumentation/embedchain/__init__.py +11 -2
- openlit/instrumentation/embedchain/embedchain.py +68 -35
- openlit/instrumentation/firecrawl/__init__.py +24 -7
- openlit/instrumentation/firecrawl/firecrawl.py +46 -20
- openlit/instrumentation/google_ai_studio/__init__.py +45 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/utils.py +180 -67
- openlit/instrumentation/gpt4all/__init__.py +22 -7
- openlit/instrumentation/gpt4all/gpt4all.py +67 -29
- openlit/instrumentation/gpt4all/utils.py +285 -61
- openlit/instrumentation/gpu/__init__.py +128 -47
- openlit/instrumentation/groq/__init__.py +21 -4
- openlit/instrumentation/groq/async_groq.py +33 -21
- openlit/instrumentation/groq/groq.py +33 -21
- openlit/instrumentation/groq/utils.py +192 -55
- openlit/instrumentation/haystack/__init__.py +70 -24
- openlit/instrumentation/haystack/async_haystack.py +28 -6
- openlit/instrumentation/haystack/haystack.py +28 -6
- openlit/instrumentation/haystack/utils.py +196 -74
- openlit/instrumentation/julep/__init__.py +69 -19
- openlit/instrumentation/julep/async_julep.py +53 -27
- openlit/instrumentation/julep/julep.py +53 -28
- openlit/instrumentation/langchain/__init__.py +74 -63
- openlit/instrumentation/langchain/callback_handler.py +1100 -0
- openlit/instrumentation/langchain_community/__init__.py +13 -2
- openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/utils.py +35 -9
- openlit/instrumentation/letta/__init__.py +68 -15
- openlit/instrumentation/letta/letta.py +99 -54
- openlit/instrumentation/litellm/__init__.py +43 -14
- openlit/instrumentation/litellm/async_litellm.py +51 -26
- openlit/instrumentation/litellm/litellm.py +51 -26
- openlit/instrumentation/litellm/utils.py +312 -101
- openlit/instrumentation/llamaindex/__init__.py +267 -90
- openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/utils.py +204 -91
- openlit/instrumentation/mem0/__init__.py +11 -2
- openlit/instrumentation/mem0/mem0.py +50 -29
- openlit/instrumentation/milvus/__init__.py +10 -2
- openlit/instrumentation/milvus/milvus.py +31 -6
- openlit/instrumentation/milvus/utils.py +166 -67
- openlit/instrumentation/mistral/__init__.py +63 -18
- openlit/instrumentation/mistral/async_mistral.py +63 -24
- openlit/instrumentation/mistral/mistral.py +63 -24
- openlit/instrumentation/mistral/utils.py +277 -69
- openlit/instrumentation/multion/__init__.py +69 -19
- openlit/instrumentation/multion/async_multion.py +57 -26
- openlit/instrumentation/multion/multion.py +57 -26
- openlit/instrumentation/ollama/__init__.py +39 -18
- openlit/instrumentation/ollama/async_ollama.py +57 -26
- openlit/instrumentation/ollama/ollama.py +57 -26
- openlit/instrumentation/ollama/utils.py +226 -50
- openlit/instrumentation/openai/__init__.py +156 -32
- openlit/instrumentation/openai/async_openai.py +147 -67
- openlit/instrumentation/openai/openai.py +150 -67
- openlit/instrumentation/openai/utils.py +660 -186
- openlit/instrumentation/openai_agents/__init__.py +6 -2
- openlit/instrumentation/openai_agents/processor.py +409 -537
- openlit/instrumentation/phidata/__init__.py +13 -5
- openlit/instrumentation/phidata/phidata.py +67 -32
- openlit/instrumentation/pinecone/__init__.py +48 -9
- openlit/instrumentation/pinecone/async_pinecone.py +27 -5
- openlit/instrumentation/pinecone/pinecone.py +27 -5
- openlit/instrumentation/pinecone/utils.py +153 -47
- openlit/instrumentation/premai/__init__.py +22 -7
- openlit/instrumentation/premai/premai.py +51 -26
- openlit/instrumentation/premai/utils.py +246 -59
- openlit/instrumentation/pydantic_ai/__init__.py +49 -22
- openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
- openlit/instrumentation/pydantic_ai/utils.py +89 -24
- openlit/instrumentation/qdrant/__init__.py +19 -4
- openlit/instrumentation/qdrant/async_qdrant.py +33 -7
- openlit/instrumentation/qdrant/qdrant.py +33 -7
- openlit/instrumentation/qdrant/utils.py +228 -93
- openlit/instrumentation/reka/__init__.py +23 -10
- openlit/instrumentation/reka/async_reka.py +17 -11
- openlit/instrumentation/reka/reka.py +17 -11
- openlit/instrumentation/reka/utils.py +138 -36
- openlit/instrumentation/together/__init__.py +44 -12
- openlit/instrumentation/together/async_together.py +50 -27
- openlit/instrumentation/together/together.py +50 -27
- openlit/instrumentation/together/utils.py +301 -71
- openlit/instrumentation/transformers/__init__.py +2 -1
- openlit/instrumentation/transformers/transformers.py +13 -3
- openlit/instrumentation/transformers/utils.py +139 -36
- openlit/instrumentation/vertexai/__init__.py +81 -16
- openlit/instrumentation/vertexai/async_vertexai.py +33 -15
- openlit/instrumentation/vertexai/utils.py +123 -27
- openlit/instrumentation/vertexai/vertexai.py +33 -15
- openlit/instrumentation/vllm/__init__.py +12 -5
- openlit/instrumentation/vllm/utils.py +121 -31
- openlit/instrumentation/vllm/vllm.py +16 -10
- openlit/otel/events.py +35 -10
- openlit/otel/metrics.py +32 -24
- openlit/otel/tracing.py +24 -9
- openlit/semcov/__init__.py +101 -7
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
- openlit-1.34.31.dist-info/RECORD +166 -0
- openlit/instrumentation/langchain/async_langchain.py +0 -102
- openlit/instrumentation/langchain/langchain.py +0 -102
- openlit/instrumentation/langchain/utils.py +0 -252
- openlit-1.34.29.dist-info/RECORD +0 -166
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
- {openlit-1.34.29.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
@@ -16,8 +16,17 @@ from openlit.instrumentation.cohere.utils import (
|
|
16
16
|
)
|
17
17
|
from openlit.semcov import SemanticConvention
|
18
18
|
|
19
|
-
|
20
|
-
|
19
|
+
|
20
|
+
def chat(
|
21
|
+
version,
|
22
|
+
environment,
|
23
|
+
application_name,
|
24
|
+
tracer,
|
25
|
+
pricing_info,
|
26
|
+
capture_message_content,
|
27
|
+
metrics,
|
28
|
+
disable_metrics,
|
29
|
+
):
|
21
30
|
"""
|
22
31
|
Generates a telemetry wrapper for GenAI chat function call
|
23
32
|
"""
|
@@ -27,7 +36,9 @@ def chat(version, environment, application_name,
|
|
27
36
|
Wraps the GenAI chat function call.
|
28
37
|
"""
|
29
38
|
|
30
|
-
server_address, server_port = set_server_address_and_port(
|
39
|
+
server_address, server_port = set_server_address_and_port(
|
40
|
+
instance, "api.cohere.com", 443
|
41
|
+
)
|
31
42
|
request_model = kwargs.get("model", "command-r-plus-08-2024")
|
32
43
|
|
33
44
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
@@ -49,15 +60,24 @@ def chat(version, environment, application_name,
|
|
49
60
|
capture_message_content=capture_message_content,
|
50
61
|
disable_metrics=disable_metrics,
|
51
62
|
version=version,
|
52
|
-
**kwargs
|
63
|
+
**kwargs,
|
53
64
|
)
|
54
65
|
|
55
66
|
return response
|
56
67
|
|
57
68
|
return wrapper
|
58
69
|
|
59
|
-
|
60
|
-
|
70
|
+
|
71
|
+
def chat_stream(
|
72
|
+
version,
|
73
|
+
environment,
|
74
|
+
application_name,
|
75
|
+
tracer,
|
76
|
+
pricing_info,
|
77
|
+
capture_message_content,
|
78
|
+
metrics,
|
79
|
+
disable_metrics,
|
80
|
+
):
|
61
81
|
"""
|
62
82
|
Generates a telemetry wrapper for GenAI chat_stream function call
|
63
83
|
"""
|
@@ -68,15 +88,15 @@ def chat_stream(version, environment, application_name,
|
|
68
88
|
"""
|
69
89
|
|
70
90
|
def __init__(
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
91
|
+
self,
|
92
|
+
wrapped,
|
93
|
+
span,
|
94
|
+
span_name,
|
95
|
+
kwargs,
|
96
|
+
server_address,
|
97
|
+
server_port,
|
98
|
+
**args,
|
99
|
+
):
|
80
100
|
self.__wrapped__ = wrapped
|
81
101
|
self._span = span
|
82
102
|
self._span_name = span_name
|
@@ -120,7 +140,9 @@ def chat_stream(version, environment, application_name,
|
|
120
140
|
return chunk
|
121
141
|
except StopIteration:
|
122
142
|
try:
|
123
|
-
with tracer.start_as_current_span(
|
143
|
+
with tracer.start_as_current_span(
|
144
|
+
self._span_name, kind=SpanKind.CLIENT
|
145
|
+
) as self._span:
|
124
146
|
process_streaming_chat_response(
|
125
147
|
self,
|
126
148
|
pricing_info=pricing_info,
|
@@ -129,7 +151,7 @@ def chat_stream(version, environment, application_name,
|
|
129
151
|
metrics=metrics,
|
130
152
|
capture_message_content=capture_message_content,
|
131
153
|
disable_metrics=disable_metrics,
|
132
|
-
version=version
|
154
|
+
version=version,
|
133
155
|
)
|
134
156
|
|
135
157
|
except Exception as e:
|
@@ -142,7 +164,9 @@ def chat_stream(version, environment, application_name,
|
|
142
164
|
Wraps the GenAI chat_stream function call.
|
143
165
|
"""
|
144
166
|
|
145
|
-
server_address, server_port = set_server_address_and_port(
|
167
|
+
server_address, server_port = set_server_address_and_port(
|
168
|
+
instance, "api.cohere.com", 443
|
169
|
+
)
|
146
170
|
request_model = kwargs.get("model", "command-r-plus-08-2024")
|
147
171
|
|
148
172
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
@@ -151,12 +175,23 @@ def chat_stream(version, environment, application_name,
|
|
151
175
|
awaited_wrapped = wrapped(*args, **kwargs)
|
152
176
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
153
177
|
|
154
|
-
return TracedSyncStream(
|
178
|
+
return TracedSyncStream(
|
179
|
+
awaited_wrapped, span, span_name, kwargs, server_address, server_port
|
180
|
+
)
|
155
181
|
|
156
182
|
return wrapper
|
157
183
|
|
158
|
-
|
159
|
-
|
184
|
+
|
185
|
+
def embed(
|
186
|
+
version,
|
187
|
+
environment,
|
188
|
+
application_name,
|
189
|
+
tracer,
|
190
|
+
pricing_info,
|
191
|
+
capture_message_content,
|
192
|
+
metrics,
|
193
|
+
disable_metrics,
|
194
|
+
):
|
160
195
|
"""
|
161
196
|
Generates a telemetry wrapper for GenAI embedding function call
|
162
197
|
"""
|
@@ -166,10 +201,14 @@ def embed(version, environment, application_name,
|
|
166
201
|
Wraps the GenAI embedding function call.
|
167
202
|
"""
|
168
203
|
|
169
|
-
server_address, server_port = set_server_address_and_port(
|
204
|
+
server_address, server_port = set_server_address_and_port(
|
205
|
+
instance, "api.cohere.com", 443
|
206
|
+
)
|
170
207
|
request_model = kwargs.get("model", "embed-english-v3.0")
|
171
208
|
|
172
|
-
span_name =
|
209
|
+
span_name = (
|
210
|
+
f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
|
211
|
+
)
|
173
212
|
|
174
213
|
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
175
214
|
start_time = time.time()
|
@@ -190,7 +229,7 @@ def embed(version, environment, application_name,
|
|
190
229
|
capture_message_content=capture_message_content,
|
191
230
|
disable_metrics=disable_metrics,
|
192
231
|
version=version,
|
193
|
-
**kwargs
|
232
|
+
**kwargs,
|
194
233
|
)
|
195
234
|
|
196
235
|
except Exception as e:
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
Cohere OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
+
|
4
5
|
import time
|
5
6
|
|
6
7
|
from opentelemetry.trace import Status, StatusCode
|
@@ -17,6 +18,7 @@ from openlit.__helpers import (
|
|
17
18
|
)
|
18
19
|
from openlit.semcov import SemanticConvention
|
19
20
|
|
21
|
+
|
20
22
|
def format_content(messages):
|
21
23
|
"""
|
22
24
|
Process a list of messages to extract content.
|
@@ -35,8 +37,9 @@ def format_content(messages):
|
|
35
37
|
|
36
38
|
if isinstance(content, list):
|
37
39
|
content_str = ", ".join(
|
38
|
-
f
|
39
|
-
if "type" in item
|
40
|
+
f"{item['type']}: {item['text'] if 'text' in item else item.get('image_url', '')}"
|
41
|
+
if "type" in item
|
42
|
+
else f"text: {item.get('text', '')}"
|
40
43
|
for item in content
|
41
44
|
)
|
42
45
|
formatted_messages.append(f"{role}: {content_str}")
|
@@ -45,6 +48,7 @@ def format_content(messages):
|
|
45
48
|
|
46
49
|
return "\n".join(formatted_messages)
|
47
50
|
|
51
|
+
|
48
52
|
def process_chunk(scope, chunk):
|
49
53
|
"""
|
50
54
|
Process a chunk of response data and update state.
|
@@ -65,13 +69,17 @@ def process_chunk(scope, chunk):
|
|
65
69
|
scope._response_id = chunked.get("id")
|
66
70
|
|
67
71
|
if chunked.get("type") == "content-delta":
|
68
|
-
content =
|
72
|
+
content = (
|
73
|
+
chunked.get("delta", {}).get("message", {}).get("content", {}).get("text")
|
74
|
+
)
|
69
75
|
if content:
|
70
76
|
scope._llmresponse += content
|
71
77
|
|
72
78
|
# Handle tool plan deltas
|
73
79
|
if chunked.get("type") == "tool-plan-delta":
|
74
|
-
tool_plan_text =
|
80
|
+
tool_plan_text = (
|
81
|
+
chunked.get("delta", {}).get("message", {}).get("tool_plan", "")
|
82
|
+
)
|
75
83
|
if tool_plan_text:
|
76
84
|
if not hasattr(scope, "_tool_plan"):
|
77
85
|
scope._tool_plan = ""
|
@@ -94,8 +102,8 @@ def process_chunk(scope, chunk):
|
|
94
102
|
"type": tool_call.get("type", "function"),
|
95
103
|
"function": {
|
96
104
|
"name": tool_call.get("function", {}).get("name", ""),
|
97
|
-
"arguments": ""
|
98
|
-
}
|
105
|
+
"arguments": "",
|
106
|
+
},
|
99
107
|
}
|
100
108
|
|
101
109
|
# Handle tool call deltas (arguments)
|
@@ -103,7 +111,13 @@ def process_chunk(scope, chunk):
|
|
103
111
|
if hasattr(scope, "_tools") and scope._tools:
|
104
112
|
index = chunked.get("index", 0)
|
105
113
|
if index < len(scope._tools):
|
106
|
-
arguments =
|
114
|
+
arguments = (
|
115
|
+
chunked.get("delta", {})
|
116
|
+
.get("message", {})
|
117
|
+
.get("tool_calls", {})
|
118
|
+
.get("function", {})
|
119
|
+
.get("arguments", "")
|
120
|
+
)
|
107
121
|
if arguments:
|
108
122
|
scope._tools[index]["function"]["arguments"] += arguments
|
109
123
|
|
@@ -115,8 +129,18 @@ def process_chunk(scope, chunk):
|
|
115
129
|
scope._output_tokens = usage.get("output_tokens", 0)
|
116
130
|
scope._end_time = time.time()
|
117
131
|
|
118
|
-
|
119
|
-
|
132
|
+
|
133
|
+
def common_chat_logic(
|
134
|
+
scope,
|
135
|
+
pricing_info,
|
136
|
+
environment,
|
137
|
+
application_name,
|
138
|
+
metrics,
|
139
|
+
capture_message_content,
|
140
|
+
disable_metrics,
|
141
|
+
version,
|
142
|
+
is_stream,
|
143
|
+
):
|
120
144
|
"""
|
121
145
|
Process chat request and generate Telemetry
|
122
146
|
"""
|
@@ -127,58 +151,123 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
127
151
|
prompt = format_content(scope._kwargs.get("messages", []))
|
128
152
|
request_model = scope._kwargs.get("model", "command-r-plus-08-2024")
|
129
153
|
|
130
|
-
cost = get_chat_model_cost(
|
154
|
+
cost = get_chat_model_cost(
|
155
|
+
request_model, pricing_info, scope._input_tokens, scope._output_tokens
|
156
|
+
)
|
131
157
|
|
132
158
|
# Common Span Attributes
|
133
|
-
common_span_attributes(
|
134
|
-
|
135
|
-
|
136
|
-
|
159
|
+
common_span_attributes(
|
160
|
+
scope,
|
161
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
162
|
+
SemanticConvention.GEN_AI_SYSTEM_COHERE,
|
163
|
+
scope._server_address,
|
164
|
+
scope._server_port,
|
165
|
+
request_model,
|
166
|
+
scope._response_model,
|
167
|
+
environment,
|
168
|
+
application_name,
|
169
|
+
is_stream,
|
170
|
+
scope._tbt,
|
171
|
+
scope._ttft,
|
172
|
+
version,
|
173
|
+
)
|
137
174
|
|
138
175
|
# Span Attributes for Request parameters
|
139
|
-
scope._span.set_attribute(
|
140
|
-
|
141
|
-
|
142
|
-
scope._span.set_attribute(
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
scope._span.set_attribute(
|
176
|
+
scope._span.set_attribute(
|
177
|
+
SemanticConvention.GEN_AI_REQUEST_SEED, scope._kwargs.get("seed", "")
|
178
|
+
)
|
179
|
+
scope._span.set_attribute(
|
180
|
+
SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
181
|
+
scope._kwargs.get("frequency_penalty", 0.0),
|
182
|
+
)
|
183
|
+
scope._span.set_attribute(
|
184
|
+
SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
185
|
+
scope._kwargs.get("max_tokens", -1),
|
186
|
+
)
|
187
|
+
scope._span.set_attribute(
|
188
|
+
SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
189
|
+
scope._kwargs.get("presence_penalty", 0.0),
|
190
|
+
)
|
191
|
+
scope._span.set_attribute(
|
192
|
+
SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
193
|
+
scope._kwargs.get("stop_sequences", []),
|
194
|
+
)
|
195
|
+
scope._span.set_attribute(
|
196
|
+
SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
197
|
+
scope._kwargs.get("temperature", 0.3),
|
198
|
+
)
|
199
|
+
scope._span.set_attribute(
|
200
|
+
SemanticConvention.GEN_AI_REQUEST_TOP_K, scope._kwargs.get("k", 1.0)
|
201
|
+
)
|
202
|
+
scope._span.set_attribute(
|
203
|
+
SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("p", 1.0)
|
204
|
+
)
|
147
205
|
|
148
206
|
# Span Attributes for Response parameters
|
149
207
|
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
|
150
|
-
scope._span.set_attribute(
|
151
|
-
|
208
|
+
scope._span.set_attribute(
|
209
|
+
SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason]
|
210
|
+
)
|
211
|
+
scope._span.set_attribute(
|
212
|
+
SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
213
|
+
"text" if isinstance(scope._llmresponse, str) else "json",
|
214
|
+
)
|
152
215
|
|
153
216
|
# Span Attributes for Cost and Tokens
|
154
|
-
scope._span.set_attribute(
|
155
|
-
|
156
|
-
|
217
|
+
scope._span.set_attribute(
|
218
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
|
219
|
+
)
|
220
|
+
scope._span.set_attribute(
|
221
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens
|
222
|
+
)
|
223
|
+
scope._span.set_attribute(
|
224
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
|
225
|
+
scope._input_tokens + scope._output_tokens,
|
226
|
+
)
|
157
227
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
158
228
|
|
159
229
|
# Span Attributes for Tools - optimized
|
160
230
|
if scope._tools:
|
161
231
|
tools = scope._tools if isinstance(scope._tools, list) else [scope._tools]
|
162
232
|
|
163
|
-
names, ids, args =
|
164
|
-
(
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
233
|
+
names, ids, args = (
|
234
|
+
zip(
|
235
|
+
*[
|
236
|
+
(
|
237
|
+
t.get("function", {}).get("name", ""),
|
238
|
+
str(t.get("id", "")),
|
239
|
+
str(t.get("function", {}).get("arguments", "")),
|
240
|
+
)
|
241
|
+
for t in tools
|
242
|
+
if isinstance(t, dict) and t
|
243
|
+
]
|
244
|
+
)
|
245
|
+
if tools
|
246
|
+
else ([], [], [])
|
247
|
+
)
|
169
248
|
|
170
|
-
scope._span.set_attribute(
|
171
|
-
|
172
|
-
|
249
|
+
scope._span.set_attribute(
|
250
|
+
SemanticConvention.GEN_AI_TOOL_NAME, ", ".join(filter(None, names))
|
251
|
+
)
|
252
|
+
scope._span.set_attribute(
|
253
|
+
SemanticConvention.GEN_AI_TOOL_CALL_ID, ", ".join(filter(None, ids))
|
254
|
+
)
|
255
|
+
scope._span.set_attribute(
|
256
|
+
SemanticConvention.GEN_AI_TOOL_ARGS, ", ".join(filter(None, args))
|
257
|
+
)
|
173
258
|
|
174
259
|
# Span Attributes for Tool Plan (Cohere specific)
|
175
260
|
if hasattr(scope, "_tool_plan") and scope._tool_plan:
|
176
|
-
scope._span.set_attribute(
|
261
|
+
scope._span.set_attribute(
|
262
|
+
SemanticConvention.GEN_AI_CONTENT_REASONING, scope._tool_plan
|
263
|
+
)
|
177
264
|
|
178
265
|
# Span Attributes for Content
|
179
266
|
if capture_message_content:
|
180
267
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
181
|
-
scope._span.set_attribute(
|
268
|
+
scope._span.set_attribute(
|
269
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse
|
270
|
+
)
|
182
271
|
|
183
272
|
# To be removed once the change to span_attributes (from span events) is complete
|
184
273
|
scope._span.add_event(
|
@@ -198,23 +287,69 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
198
287
|
|
199
288
|
# Metrics
|
200
289
|
if not disable_metrics:
|
201
|
-
record_completion_metrics(
|
202
|
-
|
203
|
-
|
204
|
-
|
290
|
+
record_completion_metrics(
|
291
|
+
metrics,
|
292
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
293
|
+
SemanticConvention.GEN_AI_SYSTEM_COHERE,
|
294
|
+
scope._server_address,
|
295
|
+
scope._server_port,
|
296
|
+
request_model,
|
297
|
+
scope._response_model,
|
298
|
+
environment,
|
299
|
+
application_name,
|
300
|
+
scope._start_time,
|
301
|
+
scope._end_time,
|
302
|
+
scope._input_tokens,
|
303
|
+
scope._output_tokens,
|
304
|
+
cost,
|
305
|
+
scope._tbt,
|
306
|
+
scope._ttft,
|
307
|
+
)
|
205
308
|
|
206
|
-
|
207
|
-
|
309
|
+
|
310
|
+
def process_streaming_chat_response(
|
311
|
+
scope,
|
312
|
+
pricing_info,
|
313
|
+
environment,
|
314
|
+
application_name,
|
315
|
+
metrics,
|
316
|
+
capture_message_content=False,
|
317
|
+
disable_metrics=False,
|
318
|
+
version="",
|
319
|
+
):
|
208
320
|
"""
|
209
321
|
Process streaming chat request and generate Telemetry
|
210
322
|
"""
|
211
323
|
|
212
|
-
common_chat_logic(
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
324
|
+
common_chat_logic(
|
325
|
+
scope,
|
326
|
+
pricing_info,
|
327
|
+
environment,
|
328
|
+
application_name,
|
329
|
+
metrics,
|
330
|
+
capture_message_content,
|
331
|
+
disable_metrics,
|
332
|
+
version,
|
333
|
+
is_stream=True,
|
334
|
+
)
|
335
|
+
|
336
|
+
|
337
|
+
def process_chat_response(
|
338
|
+
response,
|
339
|
+
request_model,
|
340
|
+
pricing_info,
|
341
|
+
server_port,
|
342
|
+
server_address,
|
343
|
+
environment,
|
344
|
+
application_name,
|
345
|
+
metrics,
|
346
|
+
start_time,
|
347
|
+
span,
|
348
|
+
capture_message_content=False,
|
349
|
+
disable_metrics=False,
|
350
|
+
version="1.0.0",
|
351
|
+
**kwargs,
|
352
|
+
):
|
218
353
|
"""
|
219
354
|
Process chat request and generate Telemetry
|
220
355
|
"""
|
@@ -235,8 +370,12 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
|
|
235
370
|
scope._llmresponse = ""
|
236
371
|
scope._response_id = response_dict.get("id")
|
237
372
|
scope._response_model = request_model
|
238
|
-
scope._input_tokens =
|
239
|
-
|
373
|
+
scope._input_tokens = (
|
374
|
+
response_dict.get("usage", {}).get("billed_units", {}).get("input_tokens", 0)
|
375
|
+
)
|
376
|
+
scope._output_tokens = (
|
377
|
+
response_dict.get("usage", {}).get("billed_units", {}).get("output_tokens", 0)
|
378
|
+
)
|
240
379
|
scope._timestamps = []
|
241
380
|
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
242
381
|
scope._server_address, scope._server_port = server_address, server_port
|
@@ -252,13 +391,31 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
|
|
252
391
|
scope._tools = None
|
253
392
|
scope._tool_plan = ""
|
254
393
|
|
255
|
-
common_chat_logic(
|
256
|
-
|
394
|
+
common_chat_logic(
|
395
|
+
scope,
|
396
|
+
pricing_info,
|
397
|
+
environment,
|
398
|
+
application_name,
|
399
|
+
metrics,
|
400
|
+
capture_message_content,
|
401
|
+
disable_metrics,
|
402
|
+
version,
|
403
|
+
is_stream=False,
|
404
|
+
)
|
257
405
|
|
258
406
|
return response
|
259
407
|
|
260
|
-
|
261
|
-
|
408
|
+
|
409
|
+
def common_embedding_logic(
|
410
|
+
scope,
|
411
|
+
pricing_info,
|
412
|
+
environment,
|
413
|
+
application_name,
|
414
|
+
metrics,
|
415
|
+
capture_message_content,
|
416
|
+
disable_metrics,
|
417
|
+
version,
|
418
|
+
):
|
262
419
|
"""
|
263
420
|
Process embedding request and generate Telemetry
|
264
421
|
"""
|
@@ -269,19 +426,39 @@ def common_embedding_logic(scope, pricing_info, environment, application_name, m
|
|
269
426
|
cost = get_embed_model_cost(request_model, pricing_info, scope._input_tokens)
|
270
427
|
|
271
428
|
# Common Span Attributes
|
272
|
-
common_span_attributes(
|
273
|
-
|
274
|
-
|
275
|
-
|
429
|
+
common_span_attributes(
|
430
|
+
scope,
|
431
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
432
|
+
SemanticConvention.GEN_AI_SYSTEM_COHERE,
|
433
|
+
scope._server_address,
|
434
|
+
scope._server_port,
|
435
|
+
request_model,
|
436
|
+
scope._response_model,
|
437
|
+
environment,
|
438
|
+
application_name,
|
439
|
+
False,
|
440
|
+
0,
|
441
|
+
scope._ttft,
|
442
|
+
version,
|
443
|
+
)
|
276
444
|
|
277
445
|
# Span Attributes for Request parameters
|
278
|
-
scope._span.set_attribute(
|
446
|
+
scope._span.set_attribute(
|
447
|
+
SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS,
|
448
|
+
scope._kwargs.get("embedding_types", ["float"]),
|
449
|
+
)
|
279
450
|
|
280
451
|
# Span Attributes for Cost and Tokens
|
281
|
-
scope._span.set_attribute(
|
282
|
-
|
452
|
+
scope._span.set_attribute(
|
453
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
|
454
|
+
)
|
455
|
+
scope._span.set_attribute(
|
456
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens
|
457
|
+
)
|
283
458
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
284
|
-
scope._span.set_attribute(
|
459
|
+
scope._span.set_attribute(
|
460
|
+
SemanticConvention.GEN_AI_OUTPUT_TYPE, scope._response_type
|
461
|
+
)
|
285
462
|
|
286
463
|
# Span Attributes for Content
|
287
464
|
if capture_message_content:
|
@@ -299,13 +476,39 @@ def common_embedding_logic(scope, pricing_info, environment, application_name, m
|
|
299
476
|
|
300
477
|
# Metrics
|
301
478
|
if not disable_metrics:
|
302
|
-
record_embedding_metrics(
|
303
|
-
|
304
|
-
|
479
|
+
record_embedding_metrics(
|
480
|
+
metrics,
|
481
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
482
|
+
SemanticConvention.GEN_AI_SYSTEM_COHERE,
|
483
|
+
scope._server_address,
|
484
|
+
scope._server_port,
|
485
|
+
request_model,
|
486
|
+
scope._response_model,
|
487
|
+
environment,
|
488
|
+
application_name,
|
489
|
+
scope._start_time,
|
490
|
+
scope._end_time,
|
491
|
+
scope._input_tokens,
|
492
|
+
cost,
|
493
|
+
)
|
494
|
+
|
305
495
|
|
306
|
-
def process_embedding_response(
|
307
|
-
|
308
|
-
|
496
|
+
def process_embedding_response(
|
497
|
+
response,
|
498
|
+
request_model,
|
499
|
+
pricing_info,
|
500
|
+
server_port,
|
501
|
+
server_address,
|
502
|
+
environment,
|
503
|
+
application_name,
|
504
|
+
metrics,
|
505
|
+
start_time,
|
506
|
+
span,
|
507
|
+
capture_message_content=False,
|
508
|
+
disable_metrics=False,
|
509
|
+
version="1.0.0",
|
510
|
+
**kwargs,
|
511
|
+
):
|
309
512
|
"""
|
310
513
|
Process embedding request and generate Telemetry
|
311
514
|
"""
|
@@ -317,14 +520,24 @@ def process_embedding_response(response, request_model, pricing_info, server_por
|
|
317
520
|
scope._start_time = start_time
|
318
521
|
scope._end_time = time.time()
|
319
522
|
scope._span = span
|
320
|
-
scope._input_tokens =
|
523
|
+
scope._input_tokens = (
|
524
|
+
response_dict.get("meta", {}).get("billed_units", {}).get("input_tokens", 0)
|
525
|
+
)
|
321
526
|
scope._response_model = request_model
|
322
527
|
scope._response_type = response_dict.get("response_type", "")
|
323
528
|
scope._ttft = scope._end_time - scope._start_time
|
324
529
|
scope._server_address, scope._server_port = server_address, server_port
|
325
530
|
scope._kwargs = kwargs
|
326
531
|
|
327
|
-
common_embedding_logic(
|
328
|
-
|
532
|
+
common_embedding_logic(
|
533
|
+
scope,
|
534
|
+
pricing_info,
|
535
|
+
environment,
|
536
|
+
application_name,
|
537
|
+
metrics,
|
538
|
+
capture_message_content,
|
539
|
+
disable_metrics,
|
540
|
+
version,
|
541
|
+
)
|
329
542
|
|
330
543
|
return response
|