openlit 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +5 -0
- openlit/__init__.py +3 -2
- openlit/instrumentation/ag2/ag2.py +3 -3
- openlit/instrumentation/ai21/ai21.py +1 -1
- openlit/instrumentation/ai21/async_ai21.py +1 -1
- openlit/instrumentation/anthropic/anthropic.py +1 -1
- openlit/instrumentation/anthropic/async_anthropic.py +1 -1
- openlit/instrumentation/astra/astra.py +5 -5
- openlit/instrumentation/astra/async_astra.py +5 -5
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +3 -3
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +3 -3
- openlit/instrumentation/chroma/chroma.py +5 -5
- openlit/instrumentation/cohere/async_cohere.py +1 -1
- openlit/instrumentation/cohere/cohere.py +2 -2
- openlit/instrumentation/controlflow/controlflow.py +3 -3
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +3 -3
- openlit/instrumentation/crawl4ai/crawl4ai.py +3 -3
- openlit/instrumentation/crewai/crewai.py +4 -2
- openlit/instrumentation/dynamiq/dynamiq.py +3 -3
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +1 -2
- openlit/instrumentation/elevenlabs/elevenlabs.py +1 -2
- openlit/instrumentation/embedchain/embedchain.py +5 -5
- openlit/instrumentation/firecrawl/firecrawl.py +3 -3
- openlit/instrumentation/gpt4all/__init__.py +2 -2
- openlit/instrumentation/gpt4all/gpt4all.py +345 -220
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +2 -2
- openlit/instrumentation/groq/async_groq.py +356 -240
- openlit/instrumentation/groq/groq.py +356 -240
- openlit/instrumentation/haystack/haystack.py +3 -3
- openlit/instrumentation/julep/async_julep.py +3 -3
- openlit/instrumentation/julep/julep.py +3 -3
- openlit/instrumentation/langchain/__init__.py +13 -7
- openlit/instrumentation/langchain/async_langchain.py +384 -0
- openlit/instrumentation/langchain/langchain.py +98 -490
- openlit/instrumentation/letta/letta.py +5 -3
- openlit/instrumentation/litellm/__init__.py +4 -5
- openlit/instrumentation/litellm/async_litellm.py +316 -245
- openlit/instrumentation/litellm/litellm.py +312 -241
- openlit/instrumentation/llamaindex/llamaindex.py +3 -3
- openlit/instrumentation/mem0/mem0.py +3 -3
- openlit/instrumentation/milvus/milvus.py +5 -5
- openlit/instrumentation/mistral/__init__.py +6 -6
- openlit/instrumentation/mistral/async_mistral.py +421 -248
- openlit/instrumentation/mistral/mistral.py +418 -244
- openlit/instrumentation/multion/async_multion.py +4 -2
- openlit/instrumentation/multion/multion.py +4 -2
- openlit/instrumentation/ollama/__init__.py +8 -30
- openlit/instrumentation/ollama/async_ollama.py +385 -417
- openlit/instrumentation/ollama/ollama.py +384 -417
- openlit/instrumentation/openai/async_openai.py +7 -9
- openlit/instrumentation/openai/openai.py +7 -9
- openlit/instrumentation/phidata/phidata.py +4 -2
- openlit/instrumentation/pinecone/pinecone.py +5 -5
- openlit/instrumentation/premai/__init__.py +2 -2
- openlit/instrumentation/premai/premai.py +262 -213
- openlit/instrumentation/qdrant/async_qdrant.py +5 -5
- openlit/instrumentation/qdrant/qdrant.py +5 -5
- openlit/instrumentation/reka/__init__.py +2 -2
- openlit/instrumentation/reka/async_reka.py +90 -52
- openlit/instrumentation/reka/reka.py +90 -52
- openlit/instrumentation/together/__init__.py +4 -4
- openlit/instrumentation/together/async_together.py +278 -236
- openlit/instrumentation/together/together.py +278 -236
- openlit/instrumentation/transformers/__init__.py +1 -1
- openlit/instrumentation/transformers/transformers.py +75 -44
- openlit/instrumentation/vertexai/__init__.py +14 -64
- openlit/instrumentation/vertexai/async_vertexai.py +329 -986
- openlit/instrumentation/vertexai/vertexai.py +329 -986
- openlit/instrumentation/vllm/__init__.py +1 -1
- openlit/instrumentation/vllm/vllm.py +62 -32
- openlit/semcov/__init__.py +3 -3
- {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
- openlit-1.33.10.dist-info/RECORD +122 -0
- openlit-1.33.9.dist-info/RECORD +0 -121
- {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
- {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/WHEEL +0 -0
@@ -1,29 +1,37 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
1
|
"""
|
3
2
|
Module for monitoring Mistral API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
9
|
-
from openlit.__helpers import
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
|
+
from openlit.__helpers import (
|
10
|
+
get_chat_model_cost,
|
11
|
+
get_embed_model_cost,
|
12
|
+
handle_exception,
|
13
|
+
response_as_dict,
|
14
|
+
calculate_ttft,
|
15
|
+
calculate_tbt,
|
16
|
+
create_metrics_attributes,
|
17
|
+
set_server_address_and_port
|
18
|
+
)
|
10
19
|
from openlit.semcov import SemanticConvetion
|
11
20
|
|
12
21
|
# Initialize logger for logging potential issues and operations
|
13
22
|
logger = logging.getLogger(__name__)
|
14
23
|
|
15
|
-
def chat(
|
16
|
-
|
24
|
+
def chat(version, environment, application_name, tracer,
|
25
|
+
pricing_info, trace_content, metrics, disable_metrics):
|
17
26
|
"""
|
18
27
|
Generates a telemetry wrapper for chat to collect metrics.
|
19
28
|
|
20
29
|
Args:
|
21
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
22
30
|
version: Version of the monitoring package.
|
23
31
|
environment: Deployment environment (e.g., production, staging).
|
24
|
-
application_name: Name of the application using the
|
32
|
+
application_name: Name of the application using the Mistral API.
|
25
33
|
tracer: OpenTelemetry tracer for creating spans.
|
26
|
-
pricing_info: Information used for calculating the cost of
|
34
|
+
pricing_info: Information used for calculating the cost of Mistral usage.
|
27
35
|
trace_content: Flag indicating whether to trace the actual content.
|
28
36
|
|
29
37
|
Returns:
|
@@ -47,70 +55,95 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
47
55
|
The response from the original 'chat' method.
|
48
56
|
"""
|
49
57
|
|
50
|
-
|
58
|
+
server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
|
59
|
+
request_model = kwargs.get('model', 'mistral-small-latest')
|
60
|
+
|
61
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
|
62
|
+
|
63
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
64
|
+
start_time = time.time()
|
51
65
|
response = wrapped(*args, **kwargs)
|
66
|
+
end_time = time.time()
|
67
|
+
|
68
|
+
response_dict = response_as_dict(response)
|
52
69
|
|
53
70
|
try:
|
54
71
|
# Format 'messages' into a single string
|
55
|
-
message_prompt = kwargs.get('messages',
|
72
|
+
message_prompt = kwargs.get('messages', '')
|
56
73
|
formatted_messages = []
|
57
74
|
for message in message_prompt:
|
58
|
-
role = message[
|
59
|
-
content = message[
|
75
|
+
role = message['role']
|
76
|
+
content = message['content']
|
60
77
|
|
61
78
|
if isinstance(content, list):
|
62
79
|
content_str = ", ".join(
|
63
|
-
|
64
|
-
|
65
|
-
if 'type' in item else f"text: {item['text']}"
|
80
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
81
|
+
if "type" in item else f'text: {item["text"]}'
|
66
82
|
for item in content
|
67
83
|
)
|
68
|
-
formatted_messages.append(f
|
84
|
+
formatted_messages.append(f'{role}: {content_str}')
|
69
85
|
else:
|
70
|
-
formatted_messages.append(f
|
71
|
-
prompt =
|
86
|
+
formatted_messages.append(f'{role}: {content}')
|
87
|
+
prompt = '\n'.join(formatted_messages)
|
88
|
+
|
89
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
90
|
+
output_tokens = response_dict.get('usage').get('completion_tokens')
|
72
91
|
|
73
92
|
# Calculate cost of the operation
|
74
|
-
cost = get_chat_model_cost(
|
75
|
-
pricing_info,
|
76
|
-
|
93
|
+
cost = get_chat_model_cost(request_model,
|
94
|
+
pricing_info, input_tokens,
|
95
|
+
output_tokens)
|
77
96
|
|
78
|
-
# Set
|
79
|
-
span.set_attribute(TELEMETRY_SDK_NAME,
|
80
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
81
|
-
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
|
97
|
+
# Set base span attribues (OTel Semconv)
|
98
|
+
span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
82
99
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
83
100
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
84
|
-
span.set_attribute(SemanticConvetion.
|
85
|
-
|
86
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
87
|
-
response.id)
|
88
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
89
|
-
environment)
|
90
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
91
|
-
application_name)
|
101
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
102
|
+
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
|
92
103
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
93
|
-
|
104
|
+
request_model)
|
105
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
106
|
+
kwargs.get('seed', ''))
|
107
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
108
|
+
server_port)
|
109
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
110
|
+
kwargs.get('frequency_penalty', 0.0))
|
111
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
112
|
+
kwargs.get('max_tokens', -1))
|
113
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
114
|
+
kwargs.get('presence_penalty', 0.0))
|
115
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
116
|
+
kwargs.get('stop', []))
|
94
117
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
95
|
-
kwargs.get(
|
118
|
+
kwargs.get('temperature', 1.0))
|
96
119
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
97
|
-
kwargs.get(
|
98
|
-
span.set_attribute(SemanticConvetion.
|
99
|
-
|
100
|
-
span.set_attribute(SemanticConvetion.
|
101
|
-
|
102
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
103
|
-
False)
|
104
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
105
|
-
[response.choices[0].finish_reason])
|
120
|
+
kwargs.get('top_p', 1.0))
|
121
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
122
|
+
response_dict.get('id'))
|
123
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
124
|
+
response_dict.get('model'))
|
106
125
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
107
|
-
|
126
|
+
input_tokens)
|
108
127
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
109
|
-
|
128
|
+
output_tokens)
|
129
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
130
|
+
server_address)
|
131
|
+
|
132
|
+
# Set base span attribues (Extras)
|
133
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
134
|
+
environment)
|
135
|
+
span.set_attribute(SERVICE_NAME,
|
136
|
+
application_name)
|
137
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
138
|
+
False)
|
110
139
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
111
|
-
|
140
|
+
input_tokens + output_tokens)
|
112
141
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
113
142
|
cost)
|
143
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
144
|
+
end_time - start_time)
|
145
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
146
|
+
version)
|
114
147
|
if trace_content:
|
115
148
|
span.add_event(
|
116
149
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
@@ -118,64 +151,80 @@ def chat(gen_ai_endpoint, version, environment, application_name,
|
|
118
151
|
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
119
152
|
},
|
120
153
|
)
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
154
|
+
|
155
|
+
for i in range(kwargs.get('n',1)):
|
156
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
157
|
+
[response_dict.get('choices')[i].get('finish_reason')])
|
158
|
+
if trace_content:
|
159
|
+
span.add_event(
|
160
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
161
|
+
attributes={
|
162
|
+
# pylint: disable=line-too-long
|
163
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
164
|
+
},
|
165
|
+
)
|
166
|
+
if kwargs.get('tools'):
|
167
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
168
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
169
|
+
|
170
|
+
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
171
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
172
|
+
'text')
|
173
|
+
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
174
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
175
|
+
'json')
|
128
176
|
|
129
177
|
span.set_status(Status(StatusCode.OK))
|
130
178
|
|
131
179
|
if disable_metrics is False:
|
132
|
-
attributes =
|
133
|
-
|
134
|
-
|
135
|
-
SemanticConvetion.
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
metrics[
|
150
|
-
|
180
|
+
attributes = create_metrics_attributes(
|
181
|
+
service_name=application_name,
|
182
|
+
deployment_environment=environment,
|
183
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
184
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
|
185
|
+
request_model=request_model,
|
186
|
+
server_address=server_address,
|
187
|
+
server_port=server_port,
|
188
|
+
response_model=response_dict.get('model'),
|
189
|
+
)
|
190
|
+
|
191
|
+
metrics['genai_client_usage_tokens'].record(
|
192
|
+
input_tokens + output_tokens, attributes
|
193
|
+
)
|
194
|
+
metrics['genai_client_operation_duration'].record(
|
195
|
+
end_time - start_time, attributes
|
196
|
+
)
|
197
|
+
metrics['genai_server_ttft'].record(
|
198
|
+
end_time - start_time, attributes
|
151
199
|
)
|
152
|
-
metrics[
|
153
|
-
metrics[
|
200
|
+
metrics['genai_requests'].add(1, attributes)
|
201
|
+
metrics['genai_completion_tokens'].add(output_tokens, attributes)
|
202
|
+
metrics['genai_prompt_tokens'].add(input_tokens, attributes)
|
203
|
+
metrics['genai_cost'].record(cost, attributes)
|
154
204
|
|
155
205
|
# Return original response
|
156
206
|
return response
|
157
207
|
|
158
208
|
except Exception as e:
|
159
209
|
handle_exception(span, e)
|
160
|
-
logger.error(
|
210
|
+
logger.error('Error in trace creation: %s', e)
|
161
211
|
|
162
212
|
# Return original response
|
163
213
|
return response
|
164
214
|
|
165
215
|
return wrapper
|
166
216
|
|
167
|
-
def chat_stream(
|
217
|
+
def chat_stream(version, environment, application_name,
|
168
218
|
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
169
219
|
"""
|
170
220
|
Generates a telemetry wrapper for chat_stream to collect metrics.
|
171
221
|
|
172
222
|
Args:
|
173
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
174
223
|
version: Version of the monitoring package.
|
175
224
|
environment: Deployment environment (e.g., production, staging).
|
176
|
-
application_name: Name of the application using the
|
225
|
+
application_name: Name of the application using the Mistral API.
|
177
226
|
tracer: OpenTelemetry tracer for creating spans.
|
178
|
-
pricing_info: Information used for calculating the cost of
|
227
|
+
pricing_info: Information used for calculating the cost of Mistral usage.
|
179
228
|
trace_content: Flag indicating whether to trace the actual content.
|
180
229
|
|
181
230
|
Returns:
|
@@ -199,142 +248,253 @@ def chat_stream(gen_ai_endpoint, version, environment, application_name,
|
|
199
248
|
The response from the original 'chat_stream' method.
|
200
249
|
"""
|
201
250
|
|
202
|
-
|
203
|
-
|
204
|
-
|
251
|
+
class TracedSyncStream:
|
252
|
+
"""
|
253
|
+
Wrapper for streaming responses to collect metrics and trace data.
|
254
|
+
Wraps the 'mistral.syncStream' response to collect message IDs and aggregated response.
|
255
|
+
|
256
|
+
This class implements the '__aiter__' and '__anext__' methods that
|
257
|
+
handle asynchronous streaming responses.
|
258
|
+
|
259
|
+
This class also implements '__aenter__' and '__aexit__' methods that
|
260
|
+
handle asynchronous context management protocol.
|
261
|
+
"""
|
262
|
+
def __init__(
|
263
|
+
self,
|
264
|
+
wrapped,
|
265
|
+
span,
|
266
|
+
kwargs,
|
267
|
+
server_address,
|
268
|
+
server_port,
|
269
|
+
**args,
|
270
|
+
):
|
271
|
+
self.__wrapped__ = wrapped
|
272
|
+
self._span = span
|
205
273
|
# Placeholder for aggregating streaming response
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
274
|
+
self._llmresponse = ''
|
275
|
+
self._response_id = ''
|
276
|
+
self._response_model = ''
|
277
|
+
self._finish_reason = ''
|
278
|
+
self._input_tokens = ''
|
279
|
+
self._output_tokens = ''
|
280
|
+
|
281
|
+
self._args = args
|
282
|
+
self._kwargs = kwargs
|
283
|
+
self._start_time = time.time()
|
284
|
+
self._end_time = None
|
285
|
+
self._timestamps = []
|
286
|
+
self._ttft = 0
|
287
|
+
self._tbt = 0
|
288
|
+
self._server_address = server_address
|
289
|
+
self._server_port = server_port
|
290
|
+
|
291
|
+
def __enter__(self):
|
292
|
+
self.__wrapped__.__enter__()
|
293
|
+
return self
|
294
|
+
|
295
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
296
|
+
self.__wrapped__.__exit__(exc_type, exc_value, traceback)
|
297
|
+
|
298
|
+
def __iter__(self):
|
299
|
+
return self
|
300
|
+
|
301
|
+
def __getattr__(self, name):
|
302
|
+
"""Delegate attribute access to the wrapped object."""
|
303
|
+
return getattr(self.__wrapped__, name)
|
304
|
+
|
305
|
+
def __next__(self):
|
220
306
|
try:
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
307
|
+
chunk = self.__wrapped__.__next__()
|
308
|
+
end_time = time.time()
|
309
|
+
# Record the timestamp for the current chunk
|
310
|
+
self._timestamps.append(end_time)
|
311
|
+
|
312
|
+
if len(self._timestamps) == 1:
|
313
|
+
# Calculate time to first chunk
|
314
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
315
|
+
|
316
|
+
chunked = response_as_dict(chunk)
|
317
|
+
|
318
|
+
self._llmresponse += chunked.get('data').get('choices')[0].get('delta').get('content')
|
319
|
+
if chunked.get('data').get('usage') is not None:
|
320
|
+
self._response_id = chunked.get('data').get('id')
|
321
|
+
self._response_model = chunked.get('data').get('model')
|
322
|
+
self._input_tokens = chunked.get('data').get('usage').get('prompt_tokens')
|
323
|
+
self._output_tokens = chunked.get('data').get('usage').get('completion_tokens')
|
324
|
+
self._finish_reason = chunked.get('data').get('choices')[0].get('finish_reason')
|
325
|
+
|
326
|
+
return chunk
|
327
|
+
except StopIteration:
|
328
|
+
# Handling exception ensure observability without disrupting operation
|
329
|
+
try:
|
330
|
+
self._end_time = time.time()
|
331
|
+
if len(self._timestamps) > 1:
|
332
|
+
self._tbt = calculate_tbt(self._timestamps)
|
333
|
+
|
334
|
+
# Format 'messages' into a single string
|
335
|
+
message_prompt = self._kwargs.get('messages', '')
|
336
|
+
formatted_messages = []
|
337
|
+
for message in message_prompt:
|
338
|
+
role = message['role']
|
339
|
+
content = message['content']
|
340
|
+
|
341
|
+
if isinstance(content, list):
|
342
|
+
content_str_list = []
|
343
|
+
for item in content:
|
344
|
+
if item['type'] == 'text':
|
345
|
+
content_str_list.append(f'text: {item["text"]}')
|
346
|
+
elif (item['type'] == 'image_url' and
|
347
|
+
not item['image_url']['url'].startswith('data:')):
|
348
|
+
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
349
|
+
content_str = ", ".join(content_str_list)
|
350
|
+
formatted_messages.append(f'{role}: {content_str}')
|
351
|
+
else:
|
352
|
+
formatted_messages.append(f'{role}: {content}')
|
353
|
+
prompt = '\n'.join(formatted_messages)
|
354
|
+
|
355
|
+
request_model = self._kwargs.get('model', 'mistral-small-latest')
|
356
|
+
|
357
|
+
# Calculate cost of the operation
|
358
|
+
cost = get_chat_model_cost(request_model,
|
359
|
+
pricing_info, self._input_tokens,
|
360
|
+
self._output_tokens)
|
361
|
+
|
362
|
+
# Set Span attributes (OTel Semconv)
|
363
|
+
self._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
364
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
365
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
366
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
367
|
+
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
|
368
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
369
|
+
request_model)
|
370
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
371
|
+
self._kwargs.get('seed', ''))
|
372
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
373
|
+
self._server_port)
|
374
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
375
|
+
self._kwargs.get('frequency_penalty', 0.0))
|
376
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
377
|
+
self._kwargs.get('max_tokens', -1))
|
378
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
379
|
+
self._kwargs.get('presence_penalty', 0.0))
|
380
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
381
|
+
self._kwargs.get('stop_sequences', []))
|
382
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
383
|
+
self._kwargs.get('temperature', 0.3))
|
384
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
|
385
|
+
self._kwargs.get('k', 1.0))
|
386
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
387
|
+
self._kwargs.get('p', 1.0))
|
388
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
389
|
+
[self._finish_reason])
|
390
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
391
|
+
self._response_id)
|
392
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
393
|
+
self._response_model)
|
394
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
395
|
+
self._input_tokens)
|
396
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
397
|
+
self._output_tokens)
|
398
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
399
|
+
self._server_address)
|
400
|
+
|
401
|
+
if isinstance(self._llmresponse, str):
|
402
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
403
|
+
'text')
|
236
404
|
else:
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
405
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
406
|
+
'json')
|
407
|
+
|
408
|
+
# Set Span attributes (Extra)
|
409
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
410
|
+
environment)
|
411
|
+
self._span.set_attribute(SERVICE_NAME,
|
412
|
+
application_name)
|
413
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
414
|
+
True)
|
415
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
416
|
+
self._input_tokens + self._output_tokens)
|
417
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
418
|
+
cost)
|
419
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
420
|
+
self._tbt)
|
421
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
422
|
+
self._ttft)
|
423
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
424
|
+
version)
|
425
|
+
if trace_content:
|
426
|
+
self._span.add_event(
|
427
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
428
|
+
attributes={
|
429
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
430
|
+
},
|
431
|
+
)
|
432
|
+
self._span.add_event(
|
433
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
434
|
+
attributes={
|
435
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
436
|
+
},
|
437
|
+
)
|
438
|
+
self._span.set_status(Status(StatusCode.OK))
|
439
|
+
|
440
|
+
if disable_metrics is False:
|
441
|
+
attributes = create_metrics_attributes(
|
442
|
+
service_name=application_name,
|
443
|
+
deployment_environment=environment,
|
444
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
445
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
|
446
|
+
request_model=request_model,
|
447
|
+
server_address=self._server_address,
|
448
|
+
server_port=self._server_port,
|
449
|
+
response_model=self._response_model,
|
450
|
+
)
|
451
|
+
|
452
|
+
metrics['genai_client_usage_tokens'].record(
|
453
|
+
self._input_tokens + self._output_tokens, attributes
|
454
|
+
)
|
455
|
+
metrics['genai_client_operation_duration'].record(
|
456
|
+
self._end_time - self._start_time, attributes
|
457
|
+
)
|
458
|
+
metrics['genai_server_tbt'].record(
|
459
|
+
self._tbt, attributes
|
460
|
+
)
|
461
|
+
metrics['genai_server_ttft'].record(
|
462
|
+
self._ttft, attributes
|
463
|
+
)
|
464
|
+
metrics['genai_requests'].add(1, attributes)
|
465
|
+
metrics['genai_completion_tokens'].add(self._output_tokens, attributes)
|
466
|
+
metrics['genai_prompt_tokens'].add(self._input_tokens, attributes)
|
467
|
+
metrics['genai_cost'].record(cost, attributes)
|
293
468
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
kwargs.get("model", "mistral-small-latest")
|
310
|
-
}
|
311
|
-
|
312
|
-
metrics["genai_requests"].add(1, attributes)
|
313
|
-
metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
|
314
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
315
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
316
|
-
metrics["genai_cost"].record(cost)
|
317
|
-
|
318
|
-
except Exception as e:
|
319
|
-
handle_exception(span, e)
|
320
|
-
logger.error("Error in trace creation: %s", e)
|
321
|
-
|
322
|
-
return stream_generator()
|
469
|
+
except Exception as e:
|
470
|
+
handle_exception(self._span, e)
|
471
|
+
logger.error('Error in trace creation: %s', e)
|
472
|
+
finally:
|
473
|
+
self._span.end()
|
474
|
+
raise
|
475
|
+
|
476
|
+
server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
|
477
|
+
request_model = kwargs.get('model', 'mistral-small-latest')
|
478
|
+
|
479
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
|
480
|
+
|
481
|
+
awaited_wrapped = wrapped(*args, **kwargs)
|
482
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
483
|
+
return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
323
484
|
|
324
485
|
return wrapper
|
325
486
|
|
326
|
-
def embeddings(
|
327
|
-
|
487
|
+
def embeddings(version, environment, application_name,
|
488
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
328
489
|
"""
|
329
490
|
Generates a telemetry wrapper for embeddings to collect metrics.
|
330
491
|
|
331
492
|
Args:
|
332
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
333
493
|
version: Version of the monitoring package.
|
334
494
|
environment: Deployment environment (e.g., production, staging).
|
335
|
-
application_name: Name of the application using the
|
495
|
+
application_name: Name of the application using the Mistral API.
|
336
496
|
tracer: OpenTelemetry tracer for creating spans.
|
337
|
-
pricing_info: Information used for calculating the cost of
|
497
|
+
pricing_info: Information used for calculating the cost of Mistral usage.
|
338
498
|
trace_content: Flag indicating whether to trace the actual content.
|
339
499
|
|
340
500
|
Returns:
|
@@ -358,78 +518,92 @@ def embeddings(gen_ai_endpoint, version, environment, application_name,
|
|
358
518
|
The response from the original 'embeddings' method.
|
359
519
|
"""
|
360
520
|
|
361
|
-
|
521
|
+
server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
|
522
|
+
request_model = kwargs.get('model', 'mistral-embed')
|
523
|
+
|
524
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
|
525
|
+
|
526
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
527
|
+
start_time = time.time()
|
362
528
|
response = wrapped(*args, **kwargs)
|
529
|
+
end_time = time.time()
|
363
530
|
|
531
|
+
response_dict = response_as_dict(response)
|
364
532
|
try:
|
365
|
-
|
366
|
-
prompt = ', '.join(kwargs.get('inputs', []))
|
533
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
367
534
|
|
368
535
|
# Calculate cost of the operation
|
369
|
-
cost = get_embed_model_cost(
|
370
|
-
|
536
|
+
cost = get_embed_model_cost(request_model,
|
537
|
+
pricing_info, input_tokens)
|
371
538
|
|
372
|
-
# Set Span attributes
|
373
|
-
span.set_attribute(TELEMETRY_SDK_NAME,
|
374
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
375
|
-
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
|
539
|
+
# Set Span attributes (OTel Semconv)
|
540
|
+
span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
376
541
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
377
542
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
378
|
-
span.set_attribute(SemanticConvetion.
|
379
|
-
|
380
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
381
|
-
environment)
|
382
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
383
|
-
application_name)
|
543
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
544
|
+
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
|
384
545
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
385
|
-
|
546
|
+
request_model)
|
386
547
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
|
387
|
-
kwargs.get(
|
388
|
-
span.set_attribute(SemanticConvetion.
|
389
|
-
|
548
|
+
[kwargs.get('encoding_format', 'float')])
|
549
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
550
|
+
response_dict.get('model'))
|
551
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
552
|
+
server_address)
|
553
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
554
|
+
server_port)
|
390
555
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
391
|
-
|
556
|
+
input_tokens)
|
557
|
+
|
558
|
+
# Set Span attributes (Extras)
|
559
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
560
|
+
environment)
|
561
|
+
span.set_attribute(SERVICE_NAME,
|
562
|
+
application_name)
|
392
563
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
393
|
-
|
564
|
+
input_tokens)
|
394
565
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
395
566
|
cost)
|
567
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
568
|
+
version)
|
569
|
+
|
396
570
|
if trace_content:
|
397
571
|
span.add_event(
|
398
572
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
399
573
|
attributes={
|
400
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT:
|
574
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get('inputs', '')),
|
401
575
|
},
|
402
576
|
)
|
403
577
|
|
404
578
|
span.set_status(Status(StatusCode.OK))
|
405
579
|
|
406
580
|
if disable_metrics is False:
|
407
|
-
attributes =
|
408
|
-
|
409
|
-
|
410
|
-
SemanticConvetion.
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
metrics[
|
424
|
-
metrics[
|
425
|
-
metrics[
|
581
|
+
attributes = create_metrics_attributes(
|
582
|
+
service_name=application_name,
|
583
|
+
deployment_environment=environment,
|
584
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
585
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
|
586
|
+
request_model=request_model,
|
587
|
+
server_address=server_address,
|
588
|
+
server_port=server_port,
|
589
|
+
response_model=response_dict.get('model'),
|
590
|
+
)
|
591
|
+
metrics['genai_client_usage_tokens'].record(
|
592
|
+
input_tokens, attributes
|
593
|
+
)
|
594
|
+
metrics['genai_client_operation_duration'].record(
|
595
|
+
end_time - start_time, attributes
|
596
|
+
)
|
597
|
+
metrics['genai_requests'].add(1, attributes)
|
598
|
+
metrics['genai_prompt_tokens'].add(input_tokens, attributes)
|
599
|
+
metrics['genai_cost'].record(cost, attributes)
|
426
600
|
|
427
601
|
# Return original response
|
428
602
|
return response
|
429
603
|
|
430
604
|
except Exception as e:
|
431
605
|
handle_exception(span, e)
|
432
|
-
logger.error(
|
606
|
+
logger.error('Error in trace creation: %s', e)
|
433
607
|
|
434
608
|
# Return original response
|
435
609
|
return response
|