openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +78 -0
- openlit/__init__.py +41 -13
- openlit/instrumentation/ag2/__init__.py +9 -10
- openlit/instrumentation/ag2/ag2.py +134 -69
- openlit/instrumentation/ai21/__init__.py +6 -5
- openlit/instrumentation/ai21/ai21.py +71 -534
- openlit/instrumentation/ai21/async_ai21.py +71 -534
- openlit/instrumentation/ai21/utils.py +407 -0
- openlit/instrumentation/anthropic/__init__.py +3 -3
- openlit/instrumentation/anthropic/anthropic.py +5 -5
- openlit/instrumentation/anthropic/async_anthropic.py +5 -5
- openlit/instrumentation/assemblyai/__init__.py +2 -2
- openlit/instrumentation/assemblyai/assemblyai.py +3 -3
- openlit/instrumentation/astra/__init__.py +25 -25
- openlit/instrumentation/astra/astra.py +7 -7
- openlit/instrumentation/astra/async_astra.py +7 -7
- openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
- openlit/instrumentation/bedrock/__init__.py +2 -2
- openlit/instrumentation/bedrock/bedrock.py +3 -3
- openlit/instrumentation/chroma/__init__.py +9 -9
- openlit/instrumentation/chroma/chroma.py +7 -7
- openlit/instrumentation/cohere/__init__.py +7 -7
- openlit/instrumentation/cohere/async_cohere.py +10 -10
- openlit/instrumentation/cohere/cohere.py +11 -11
- openlit/instrumentation/controlflow/__init__.py +4 -4
- openlit/instrumentation/controlflow/controlflow.py +5 -5
- openlit/instrumentation/crawl4ai/__init__.py +3 -3
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
- openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
- openlit/instrumentation/crewai/__init__.py +3 -3
- openlit/instrumentation/crewai/crewai.py +6 -4
- openlit/instrumentation/dynamiq/__init__.py +5 -5
- openlit/instrumentation/dynamiq/dynamiq.py +5 -5
- openlit/instrumentation/elevenlabs/__init__.py +5 -5
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
- openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
- openlit/instrumentation/embedchain/__init__.py +2 -2
- openlit/instrumentation/embedchain/embedchain.py +9 -9
- openlit/instrumentation/firecrawl/__init__.py +3 -3
- openlit/instrumentation/firecrawl/firecrawl.py +5 -5
- openlit/instrumentation/google_ai_studio/__init__.py +3 -3
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
- openlit/instrumentation/gpt4all/__init__.py +5 -5
- openlit/instrumentation/gpt4all/gpt4all.py +350 -225
- openlit/instrumentation/gpu/__init__.py +5 -5
- openlit/instrumentation/groq/__init__.py +5 -5
- openlit/instrumentation/groq/async_groq.py +359 -243
- openlit/instrumentation/groq/groq.py +359 -243
- openlit/instrumentation/haystack/__init__.py +2 -2
- openlit/instrumentation/haystack/haystack.py +5 -5
- openlit/instrumentation/julep/__init__.py +7 -7
- openlit/instrumentation/julep/async_julep.py +6 -6
- openlit/instrumentation/julep/julep.py +6 -6
- openlit/instrumentation/langchain/__init__.py +15 -9
- openlit/instrumentation/langchain/async_langchain.py +388 -0
- openlit/instrumentation/langchain/langchain.py +110 -497
- openlit/instrumentation/letta/__init__.py +7 -7
- openlit/instrumentation/letta/letta.py +10 -8
- openlit/instrumentation/litellm/__init__.py +9 -10
- openlit/instrumentation/litellm/async_litellm.py +321 -250
- openlit/instrumentation/litellm/litellm.py +319 -248
- openlit/instrumentation/llamaindex/__init__.py +2 -2
- openlit/instrumentation/llamaindex/llamaindex.py +5 -5
- openlit/instrumentation/mem0/__init__.py +2 -2
- openlit/instrumentation/mem0/mem0.py +5 -5
- openlit/instrumentation/milvus/__init__.py +2 -2
- openlit/instrumentation/milvus/milvus.py +7 -7
- openlit/instrumentation/mistral/__init__.py +13 -13
- openlit/instrumentation/mistral/async_mistral.py +426 -253
- openlit/instrumentation/mistral/mistral.py +424 -250
- openlit/instrumentation/multion/__init__.py +7 -7
- openlit/instrumentation/multion/async_multion.py +9 -7
- openlit/instrumentation/multion/multion.py +9 -7
- openlit/instrumentation/ollama/__init__.py +19 -39
- openlit/instrumentation/ollama/async_ollama.py +137 -563
- openlit/instrumentation/ollama/ollama.py +136 -563
- openlit/instrumentation/ollama/utils.py +333 -0
- openlit/instrumentation/openai/__init__.py +11 -11
- openlit/instrumentation/openai/async_openai.py +25 -27
- openlit/instrumentation/openai/openai.py +25 -27
- openlit/instrumentation/phidata/__init__.py +2 -2
- openlit/instrumentation/phidata/phidata.py +6 -4
- openlit/instrumentation/pinecone/__init__.py +6 -6
- openlit/instrumentation/pinecone/pinecone.py +7 -7
- openlit/instrumentation/premai/__init__.py +5 -5
- openlit/instrumentation/premai/premai.py +268 -219
- openlit/instrumentation/qdrant/__init__.py +2 -2
- openlit/instrumentation/qdrant/async_qdrant.py +7 -7
- openlit/instrumentation/qdrant/qdrant.py +7 -7
- openlit/instrumentation/reka/__init__.py +5 -5
- openlit/instrumentation/reka/async_reka.py +93 -55
- openlit/instrumentation/reka/reka.py +93 -55
- openlit/instrumentation/together/__init__.py +9 -9
- openlit/instrumentation/together/async_together.py +284 -242
- openlit/instrumentation/together/together.py +284 -242
- openlit/instrumentation/transformers/__init__.py +3 -3
- openlit/instrumentation/transformers/transformers.py +79 -48
- openlit/instrumentation/vertexai/__init__.py +19 -69
- openlit/instrumentation/vertexai/async_vertexai.py +333 -990
- openlit/instrumentation/vertexai/vertexai.py +333 -990
- openlit/instrumentation/vllm/__init__.py +3 -3
- openlit/instrumentation/vllm/vllm.py +65 -35
- openlit/otel/events.py +85 -0
- openlit/otel/tracing.py +3 -13
- openlit/semcov/__init__.py +16 -4
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
- openlit-1.33.11.dist-info/RECORD +125 -0
- openlit-1.33.9.dist-info/RECORD +0 -121
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
- {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -1,30 +1,38 @@
|
|
1
|
-
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
1
|
"""
|
3
2
|
Module for monitoring Mistral API calls.
|
4
3
|
"""
|
5
4
|
|
6
5
|
import logging
|
6
|
+
import time
|
7
7
|
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
-
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
9
|
-
from openlit.__helpers import
|
8
|
+
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
9
|
+
from openlit.__helpers import (
|
10
|
+
get_chat_model_cost,
|
11
|
+
get_embed_model_cost,
|
12
|
+
handle_exception,
|
13
|
+
response_as_dict,
|
14
|
+
calculate_ttft,
|
15
|
+
calculate_tbt,
|
16
|
+
create_metrics_attributes,
|
17
|
+
set_server_address_and_port
|
18
|
+
)
|
10
19
|
from openlit.semcov import SemanticConvetion
|
11
20
|
|
12
21
|
# Initialize logger for logging potential issues and operations
|
13
22
|
logger = logging.getLogger(__name__)
|
14
23
|
|
15
|
-
def async_chat(
|
16
|
-
|
24
|
+
def async_chat(version, environment, application_name, tracer,
|
25
|
+
pricing_info, capture_message_content, metrics, disable_metrics):
|
17
26
|
"""
|
18
27
|
Generates a telemetry wrapper for chat to collect metrics.
|
19
28
|
|
20
29
|
Args:
|
21
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
22
30
|
version: Version of the monitoring package.
|
23
31
|
environment: Deployment environment (e.g., production, staging).
|
24
|
-
application_name: Name of the application using the
|
32
|
+
application_name: Name of the application using the Mistral API.
|
25
33
|
tracer: OpenTelemetry tracer for creating spans.
|
26
|
-
pricing_info: Information used for calculating the cost of
|
27
|
-
|
34
|
+
pricing_info: Information used for calculating the cost of Mistral usage.
|
35
|
+
capture_message_content: Flag indicating whether to trace the actual content.
|
28
36
|
|
29
37
|
Returns:
|
30
38
|
A function that wraps the chat method to add telemetry.
|
@@ -47,137 +55,177 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
|
|
47
55
|
The response from the original 'chat' method.
|
48
56
|
"""
|
49
57
|
|
50
|
-
|
51
|
-
|
58
|
+
server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
|
59
|
+
request_model = kwargs.get('model', 'mistral-small-latest')
|
60
|
+
|
61
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
|
62
|
+
|
63
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
64
|
+
start_time = time.time()
|
52
65
|
response = await wrapped(*args, **kwargs)
|
66
|
+
end_time = time.time()
|
67
|
+
|
68
|
+
response_dict = response_as_dict(response)
|
53
69
|
|
54
70
|
try:
|
55
71
|
# Format 'messages' into a single string
|
56
|
-
message_prompt = kwargs.get('messages',
|
72
|
+
message_prompt = kwargs.get('messages', '')
|
57
73
|
formatted_messages = []
|
58
74
|
for message in message_prompt:
|
59
|
-
role = message[
|
60
|
-
content = message[
|
75
|
+
role = message['role']
|
76
|
+
content = message['content']
|
61
77
|
|
62
78
|
if isinstance(content, list):
|
63
79
|
content_str = ", ".join(
|
64
|
-
|
65
|
-
|
66
|
-
if 'type' in item else f"text: {item['text']}"
|
80
|
+
f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
|
81
|
+
if "type" in item else f'text: {item["text"]}'
|
67
82
|
for item in content
|
68
83
|
)
|
69
|
-
formatted_messages.append(f
|
84
|
+
formatted_messages.append(f'{role}: {content_str}')
|
70
85
|
else:
|
71
|
-
formatted_messages.append(f
|
72
|
-
prompt =
|
86
|
+
formatted_messages.append(f'{role}: {content}')
|
87
|
+
prompt = '\n'.join(formatted_messages)
|
88
|
+
|
89
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
90
|
+
output_tokens = response_dict.get('usage').get('completion_tokens')
|
73
91
|
|
74
92
|
# Calculate cost of the operation
|
75
|
-
cost = get_chat_model_cost(
|
76
|
-
pricing_info,
|
77
|
-
|
93
|
+
cost = get_chat_model_cost(request_model,
|
94
|
+
pricing_info, input_tokens,
|
95
|
+
output_tokens)
|
78
96
|
|
79
|
-
# Set
|
80
|
-
span.set_attribute(TELEMETRY_SDK_NAME,
|
81
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
82
|
-
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
|
97
|
+
# Set base span attribues (OTel Semconv)
|
98
|
+
span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
83
99
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
84
100
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
85
|
-
span.set_attribute(SemanticConvetion.
|
86
|
-
|
87
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
88
|
-
response.id)
|
89
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
90
|
-
environment)
|
91
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
92
|
-
application_name)
|
101
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
102
|
+
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
|
93
103
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
94
|
-
|
104
|
+
request_model)
|
105
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
106
|
+
kwargs.get('seed', ''))
|
107
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
108
|
+
server_port)
|
109
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
110
|
+
kwargs.get('frequency_penalty', 0.0))
|
111
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
112
|
+
kwargs.get('max_tokens', -1))
|
113
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
114
|
+
kwargs.get('presence_penalty', 0.0))
|
115
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
116
|
+
kwargs.get('stop', []))
|
95
117
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
96
|
-
kwargs.get(
|
118
|
+
kwargs.get('temperature', 1.0))
|
97
119
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
98
|
-
kwargs.get(
|
99
|
-
span.set_attribute(SemanticConvetion.
|
100
|
-
|
101
|
-
span.set_attribute(SemanticConvetion.
|
102
|
-
|
103
|
-
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
104
|
-
False)
|
105
|
-
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
106
|
-
[response.choices[0].finish_reason])
|
120
|
+
kwargs.get('top_p', 1.0))
|
121
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
122
|
+
response_dict.get('id'))
|
123
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
124
|
+
response_dict.get('model'))
|
107
125
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
108
|
-
|
126
|
+
input_tokens)
|
109
127
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
110
|
-
|
128
|
+
output_tokens)
|
129
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
130
|
+
server_address)
|
131
|
+
|
132
|
+
# Set base span attribues (Extras)
|
133
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
134
|
+
environment)
|
135
|
+
span.set_attribute(SERVICE_NAME,
|
136
|
+
application_name)
|
137
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
138
|
+
False)
|
111
139
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
112
|
-
|
140
|
+
input_tokens + output_tokens)
|
113
141
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
114
142
|
cost)
|
115
|
-
|
143
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
144
|
+
end_time - start_time)
|
145
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
146
|
+
version)
|
147
|
+
if capture_message_content:
|
116
148
|
span.add_event(
|
117
149
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
118
150
|
attributes={
|
119
151
|
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
120
152
|
},
|
121
153
|
)
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
154
|
+
|
155
|
+
for i in range(kwargs.get('n',1)):
|
156
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
157
|
+
[response_dict.get('choices')[i].get('finish_reason')])
|
158
|
+
if capture_message_content:
|
159
|
+
span.add_event(
|
160
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
161
|
+
attributes={
|
162
|
+
# pylint: disable=line-too-long
|
163
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
|
164
|
+
},
|
165
|
+
)
|
166
|
+
if kwargs.get('tools'):
|
167
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
|
168
|
+
str(response_dict.get('choices')[i].get('message').get('tool_calls')))
|
169
|
+
|
170
|
+
if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
|
171
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
172
|
+
'text')
|
173
|
+
elif response_dict.get('choices')[i].get('message').get('content') is not None:
|
174
|
+
span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
175
|
+
'json')
|
129
176
|
|
130
177
|
span.set_status(Status(StatusCode.OK))
|
131
178
|
|
132
179
|
if disable_metrics is False:
|
133
|
-
attributes =
|
134
|
-
|
135
|
-
|
136
|
-
SemanticConvetion.
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
143
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
144
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
145
|
-
kwargs.get("model", "mistral-small-latest")
|
146
|
-
}
|
147
|
-
|
148
|
-
metrics["genai_requests"].add(1, attributes)
|
149
|
-
metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
|
150
|
-
metrics["genai_completion_tokens"].add(
|
151
|
-
response.usage.completion_tokens, attributes
|
180
|
+
attributes = create_metrics_attributes(
|
181
|
+
service_name=application_name,
|
182
|
+
deployment_environment=environment,
|
183
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
184
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
|
185
|
+
request_model=request_model,
|
186
|
+
server_address=server_address,
|
187
|
+
server_port=server_port,
|
188
|
+
response_model=response_dict.get('model'),
|
152
189
|
)
|
153
|
-
|
154
|
-
metrics[
|
190
|
+
|
191
|
+
metrics['genai_client_usage_tokens'].record(
|
192
|
+
input_tokens + output_tokens, attributes
|
193
|
+
)
|
194
|
+
metrics['genai_client_operation_duration'].record(
|
195
|
+
end_time - start_time, attributes
|
196
|
+
)
|
197
|
+
metrics['genai_server_ttft'].record(
|
198
|
+
end_time - start_time, attributes
|
199
|
+
)
|
200
|
+
metrics['genai_requests'].add(1, attributes)
|
201
|
+
metrics['genai_completion_tokens'].add(output_tokens, attributes)
|
202
|
+
metrics['genai_prompt_tokens'].add(input_tokens, attributes)
|
203
|
+
metrics['genai_cost'].record(cost, attributes)
|
155
204
|
|
156
205
|
# Return original response
|
157
206
|
return response
|
158
207
|
|
159
208
|
except Exception as e:
|
160
209
|
handle_exception(span, e)
|
161
|
-
logger.error(
|
210
|
+
logger.error('Error in trace creation: %s', e)
|
162
211
|
|
163
212
|
# Return original response
|
164
213
|
return response
|
165
214
|
|
166
215
|
return wrapper
|
167
216
|
|
168
|
-
def async_chat_stream(
|
169
|
-
|
217
|
+
def async_chat_stream(version, environment, application_name,
|
218
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
170
219
|
"""
|
171
220
|
Generates a telemetry wrapper for chat_stream to collect metrics.
|
172
221
|
|
173
222
|
Args:
|
174
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
175
223
|
version: Version of the monitoring package.
|
176
224
|
environment: Deployment environment (e.g., production, staging).
|
177
|
-
application_name: Name of the application using the
|
225
|
+
application_name: Name of the application using the Mistral API.
|
178
226
|
tracer: OpenTelemetry tracer for creating spans.
|
179
|
-
pricing_info: Information used for calculating the cost of
|
180
|
-
|
227
|
+
pricing_info: Information used for calculating the cost of Mistral usage.
|
228
|
+
capture_message_content: Flag indicating whether to trace the actual content.
|
181
229
|
|
182
230
|
Returns:
|
183
231
|
A function that wraps the chat method to add telemetry.
|
@@ -200,149 +248,260 @@ def async_chat_stream(gen_ai_endpoint, version, environment, application_name,
|
|
200
248
|
The response from the original 'chat_stream' method.
|
201
249
|
"""
|
202
250
|
|
203
|
-
|
204
|
-
|
205
|
-
|
251
|
+
class TracedAsyncStream:
|
252
|
+
"""
|
253
|
+
Wrapper for streaming responses to collect metrics and trace data.
|
254
|
+
Wraps the 'mistral.syncStream' response to collect message IDs and aggregated response.
|
255
|
+
|
256
|
+
This class implements the '__aiter__' and '__anext__' methods that
|
257
|
+
handle asynchronous streaming responses.
|
258
|
+
|
259
|
+
This class also implements '__aenter__' and '__aexit__' methods that
|
260
|
+
handle asynchronous context management protocol.
|
261
|
+
"""
|
262
|
+
def __init__(
|
263
|
+
self,
|
264
|
+
wrapped,
|
265
|
+
span,
|
266
|
+
kwargs,
|
267
|
+
server_address,
|
268
|
+
server_port,
|
269
|
+
**args,
|
270
|
+
):
|
271
|
+
self.__wrapped__ = wrapped
|
272
|
+
self._span = span
|
206
273
|
# Placeholder for aggregating streaming response
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
274
|
+
self._llmresponse = ''
|
275
|
+
self._response_id = ''
|
276
|
+
self._response_model = ''
|
277
|
+
self._finish_reason = ''
|
278
|
+
self._input_tokens = ''
|
279
|
+
self._output_tokens = ''
|
280
|
+
|
281
|
+
self._args = args
|
282
|
+
self._kwargs = kwargs
|
283
|
+
self._start_time = time.time()
|
284
|
+
self._end_time = None
|
285
|
+
self._timestamps = []
|
286
|
+
self._ttft = 0
|
287
|
+
self._tbt = 0
|
288
|
+
self._server_address = server_address
|
289
|
+
self._server_port = server_port
|
290
|
+
|
291
|
+
async def __aenter__(self):
|
292
|
+
await self.__wrapped__.__aenter__()
|
293
|
+
return self
|
294
|
+
|
295
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
296
|
+
await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
|
297
|
+
|
298
|
+
def __aiter__(self):
|
299
|
+
return self
|
300
|
+
|
301
|
+
async def __getattr__(self, name):
|
302
|
+
"""Delegate attribute access to the wrapped object."""
|
303
|
+
return getattr(await self.__wrapped__, name)
|
304
|
+
|
305
|
+
async def __anext__(self):
|
221
306
|
try:
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
307
|
+
chunk = await self.__wrapped__.__anext__()
|
308
|
+
end_time = time.time()
|
309
|
+
# Record the timestamp for the current chunk
|
310
|
+
self._timestamps.append(end_time)
|
311
|
+
|
312
|
+
if len(self._timestamps) == 1:
|
313
|
+
# Calculate time to first chunk
|
314
|
+
self._ttft = calculate_ttft(self._timestamps, self._start_time)
|
315
|
+
|
316
|
+
chunked = response_as_dict(chunk)
|
317
|
+
|
318
|
+
self._llmresponse += chunked.get('data').get('choices')[0].get('delta').get('content')
|
319
|
+
if chunked.get('data').get('usage') is not None:
|
320
|
+
self._response_id = chunked.get('data').get('id')
|
321
|
+
self._response_model = chunked.get('data').get('model')
|
322
|
+
self._input_tokens = chunked.get('data').get('usage').get('prompt_tokens')
|
323
|
+
self._output_tokens = chunked.get('data').get('usage').get('completion_tokens')
|
324
|
+
self._finish_reason = chunked.get('data').get('choices')[0].get('finish_reason')
|
325
|
+
|
326
|
+
return chunk
|
327
|
+
except StopAsyncIteration:
|
328
|
+
# Handling exception ensure observability without disrupting operation
|
329
|
+
try:
|
330
|
+
self._end_time = time.time()
|
331
|
+
if len(self._timestamps) > 1:
|
332
|
+
self._tbt = calculate_tbt(self._timestamps)
|
333
|
+
|
334
|
+
# Format 'messages' into a single string
|
335
|
+
message_prompt = self._kwargs.get('messages', '')
|
336
|
+
formatted_messages = []
|
337
|
+
for message in message_prompt:
|
338
|
+
role = message['role']
|
339
|
+
content = message['content']
|
340
|
+
|
341
|
+
if isinstance(content, list):
|
342
|
+
content_str_list = []
|
343
|
+
for item in content:
|
344
|
+
if item['type'] == 'text':
|
345
|
+
content_str_list.append(f'text: {item["text"]}')
|
346
|
+
elif (item['type'] == 'image_url' and
|
347
|
+
not item['image_url']['url'].startswith('data:')):
|
348
|
+
content_str_list.append(f'image_url: {item["image_url"]["url"]}')
|
349
|
+
content_str = ", ".join(content_str_list)
|
350
|
+
formatted_messages.append(f'{role}: {content_str}')
|
351
|
+
else:
|
352
|
+
formatted_messages.append(f'{role}: {content}')
|
353
|
+
prompt = '\n'.join(formatted_messages)
|
354
|
+
|
355
|
+
request_model = self._kwargs.get('model', 'mistral-small-latest')
|
356
|
+
|
357
|
+
# Calculate cost of the operation
|
358
|
+
cost = get_chat_model_cost(request_model,
|
359
|
+
pricing_info, self._input_tokens,
|
360
|
+
self._output_tokens)
|
361
|
+
|
362
|
+
# Set Span attributes (OTel Semconv)
|
363
|
+
self._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
364
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
365
|
+
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
|
366
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
367
|
+
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
|
368
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
369
|
+
request_model)
|
370
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
|
371
|
+
self._kwargs.get('seed', ''))
|
372
|
+
self._span.set_attribute(SemanticConvetion.SERVER_PORT,
|
373
|
+
self._server_port)
|
374
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
375
|
+
self._kwargs.get('frequency_penalty', 0.0))
|
376
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
|
377
|
+
self._kwargs.get('max_tokens', -1))
|
378
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
379
|
+
self._kwargs.get('presence_penalty', 0.0))
|
380
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
|
381
|
+
self._kwargs.get('stop_sequences', []))
|
382
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
|
383
|
+
self._kwargs.get('temperature', 0.3))
|
384
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
|
385
|
+
self._kwargs.get('k', 1.0))
|
386
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
|
387
|
+
self._kwargs.get('p', 1.0))
|
388
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
|
389
|
+
[self._finish_reason])
|
390
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
|
391
|
+
self._response_id)
|
392
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
393
|
+
self._response_model)
|
394
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
395
|
+
self._input_tokens)
|
396
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
|
397
|
+
self._output_tokens)
|
398
|
+
self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
399
|
+
self._server_address)
|
400
|
+
|
401
|
+
if isinstance(self._llmresponse, str):
|
402
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
403
|
+
'text')
|
237
404
|
else:
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
405
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
|
406
|
+
'json')
|
407
|
+
|
408
|
+
# Set Span attributes (Extra)
|
409
|
+
self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
410
|
+
environment)
|
411
|
+
self._span.set_attribute(SERVICE_NAME,
|
412
|
+
application_name)
|
413
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
414
|
+
True)
|
415
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
416
|
+
self._input_tokens + self._output_tokens)
|
417
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
418
|
+
cost)
|
419
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
|
420
|
+
self._tbt)
|
421
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
|
422
|
+
self._ttft)
|
423
|
+
self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
424
|
+
version)
|
425
|
+
if capture_message_content:
|
426
|
+
self._span.add_event(
|
427
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
428
|
+
attributes={
|
429
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
430
|
+
},
|
431
|
+
)
|
432
|
+
self._span.add_event(
|
433
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
434
|
+
attributes={
|
435
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
|
436
|
+
},
|
437
|
+
)
|
438
|
+
self._span.set_status(Status(StatusCode.OK))
|
439
|
+
|
440
|
+
if disable_metrics is False:
|
441
|
+
attributes = create_metrics_attributes(
|
442
|
+
service_name=application_name,
|
443
|
+
deployment_environment=environment,
|
444
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
445
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
|
446
|
+
request_model=request_model,
|
447
|
+
server_address=self._server_address,
|
448
|
+
server_port=self._server_port,
|
449
|
+
response_model=self._response_model,
|
450
|
+
)
|
451
|
+
|
452
|
+
metrics['genai_client_usage_tokens'].record(
|
453
|
+
self._input_tokens + self._output_tokens, attributes
|
454
|
+
)
|
455
|
+
metrics['genai_client_operation_duration'].record(
|
456
|
+
self._end_time - self._start_time, attributes
|
457
|
+
)
|
458
|
+
metrics['genai_server_tbt'].record(
|
459
|
+
self._tbt, attributes
|
460
|
+
)
|
461
|
+
metrics['genai_server_ttft'].record(
|
462
|
+
self._ttft, attributes
|
463
|
+
)
|
464
|
+
metrics['genai_requests'].add(1, attributes)
|
465
|
+
metrics['genai_completion_tokens'].add(self._output_tokens, attributes)
|
466
|
+
metrics['genai_prompt_tokens'].add(self._input_tokens, attributes)
|
467
|
+
metrics['genai_cost'].record(cost, attributes)
|
468
|
+
|
469
|
+
except Exception as e:
|
470
|
+
handle_exception(self._span, e)
|
471
|
+
logger.error('Error in trace creation: %s', e)
|
472
|
+
finally:
|
473
|
+
self._span.end()
|
474
|
+
raise
|
475
|
+
|
476
|
+
server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
|
477
|
+
request_model = kwargs.get('model', 'mistral-small-latest')
|
294
478
|
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
"openlit",
|
301
|
-
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
302
|
-
application_name,
|
303
|
-
SemanticConvetion.GEN_AI_SYSTEM:
|
304
|
-
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
|
305
|
-
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
306
|
-
environment,
|
307
|
-
SemanticConvetion.GEN_AI_OPERATION:
|
308
|
-
SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
|
309
|
-
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
310
|
-
kwargs.get("model", "mistral-small-latest")
|
311
|
-
}
|
312
|
-
|
313
|
-
metrics["genai_requests"].add(1, attributes)
|
314
|
-
metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
|
315
|
-
metrics["genai_completion_tokens"].add(completion_tokens, attributes)
|
316
|
-
metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
|
317
|
-
metrics["genai_cost"].record(cost)
|
318
|
-
|
319
|
-
except Exception as e:
|
320
|
-
handle_exception(span, e)
|
321
|
-
logger.error("Error in trace creation: %s", e)
|
322
|
-
|
323
|
-
return stream_generator()
|
479
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
|
480
|
+
|
481
|
+
awaited_wrapped = await wrapped(*args, **kwargs)
|
482
|
+
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
483
|
+
return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
|
324
484
|
|
325
485
|
return wrapper
|
326
486
|
|
327
|
-
def async_embeddings(
|
328
|
-
|
487
|
+
def async_embeddings(version, environment, application_name,
|
488
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
329
489
|
"""
|
330
490
|
Generates a telemetry wrapper for embeddings to collect metrics.
|
331
491
|
|
332
492
|
Args:
|
333
|
-
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
334
493
|
version: Version of the monitoring package.
|
335
494
|
environment: Deployment environment (e.g., production, staging).
|
336
|
-
application_name: Name of the application using the
|
495
|
+
application_name: Name of the application using the Mistral API.
|
337
496
|
tracer: OpenTelemetry tracer for creating spans.
|
338
|
-
pricing_info: Information used for calculating the cost of
|
339
|
-
|
497
|
+
pricing_info: Information used for calculating the cost of Mistral usage.
|
498
|
+
capture_message_content: Flag indicating whether to trace the actual content.
|
340
499
|
|
341
500
|
Returns:
|
342
501
|
A function that wraps the embeddings method to add telemetry.
|
343
502
|
"""
|
344
503
|
|
345
|
-
|
504
|
+
def wrapper(wrapped, instance, args, kwargs):
|
346
505
|
"""
|
347
506
|
Wraps the 'embeddings' API call to add telemetry.
|
348
507
|
|
@@ -359,78 +518,92 @@ def async_embeddings(gen_ai_endpoint, version, environment, application_name,
|
|
359
518
|
The response from the original 'embeddings' method.
|
360
519
|
"""
|
361
520
|
|
362
|
-
|
363
|
-
|
521
|
+
server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
|
522
|
+
request_model = kwargs.get('model', 'mistral-embed')
|
523
|
+
|
524
|
+
span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
|
525
|
+
|
526
|
+
with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
|
527
|
+
start_time = time.time()
|
528
|
+
response = wrapped(*args, **kwargs)
|
529
|
+
end_time = time.time()
|
364
530
|
|
531
|
+
response_dict = response_as_dict(response)
|
365
532
|
try:
|
366
|
-
|
367
|
-
prompt = ', '.join(kwargs.get('inputs', []))
|
533
|
+
input_tokens = response_dict.get('usage').get('prompt_tokens')
|
368
534
|
|
369
535
|
# Calculate cost of the operation
|
370
|
-
cost = get_embed_model_cost(
|
371
|
-
|
536
|
+
cost = get_embed_model_cost(request_model,
|
537
|
+
pricing_info, input_tokens)
|
372
538
|
|
373
|
-
# Set Span attributes
|
374
|
-
span.set_attribute(TELEMETRY_SDK_NAME,
|
375
|
-
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
376
|
-
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
|
539
|
+
# Set Span attributes (OTel Semconv)
|
540
|
+
span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
|
377
541
|
span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
|
378
542
|
SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
|
379
|
-
span.set_attribute(SemanticConvetion.
|
380
|
-
|
381
|
-
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
382
|
-
environment)
|
383
|
-
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
384
|
-
application_name)
|
543
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
544
|
+
SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
|
385
545
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
386
|
-
|
546
|
+
request_model)
|
387
547
|
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
|
388
|
-
kwargs.get(
|
389
|
-
span.set_attribute(SemanticConvetion.
|
390
|
-
|
548
|
+
[kwargs.get('encoding_format', 'float')])
|
549
|
+
span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
|
550
|
+
response_dict.get('model'))
|
551
|
+
span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
|
552
|
+
server_address)
|
553
|
+
span.set_attribute(SemanticConvetion.SERVER_PORT,
|
554
|
+
server_port)
|
391
555
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
|
392
|
-
|
556
|
+
input_tokens)
|
557
|
+
|
558
|
+
# Set Span attributes (Extras)
|
559
|
+
span.set_attribute(DEPLOYMENT_ENVIRONMENT,
|
560
|
+
environment)
|
561
|
+
span.set_attribute(SERVICE_NAME,
|
562
|
+
application_name)
|
393
563
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
394
|
-
|
564
|
+
input_tokens)
|
395
565
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
396
566
|
cost)
|
397
|
-
|
567
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
|
568
|
+
version)
|
569
|
+
|
570
|
+
if capture_message_content:
|
398
571
|
span.add_event(
|
399
572
|
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
400
573
|
attributes={
|
401
|
-
SemanticConvetion.GEN_AI_CONTENT_PROMPT:
|
574
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get('inputs', '')),
|
402
575
|
},
|
403
576
|
)
|
404
577
|
|
405
578
|
span.set_status(Status(StatusCode.OK))
|
406
579
|
|
407
580
|
if disable_metrics is False:
|
408
|
-
attributes =
|
409
|
-
|
410
|
-
|
411
|
-
SemanticConvetion.
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
metrics[
|
425
|
-
metrics[
|
426
|
-
metrics[
|
581
|
+
attributes = create_metrics_attributes(
|
582
|
+
service_name=application_name,
|
583
|
+
deployment_environment=environment,
|
584
|
+
operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
|
585
|
+
system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
|
586
|
+
request_model=request_model,
|
587
|
+
server_address=server_address,
|
588
|
+
server_port=server_port,
|
589
|
+
response_model=response_dict.get('model'),
|
590
|
+
)
|
591
|
+
metrics['genai_client_usage_tokens'].record(
|
592
|
+
input_tokens, attributes
|
593
|
+
)
|
594
|
+
metrics['genai_client_operation_duration'].record(
|
595
|
+
end_time - start_time, attributes
|
596
|
+
)
|
597
|
+
metrics['genai_requests'].add(1, attributes)
|
598
|
+
metrics['genai_prompt_tokens'].add(input_tokens, attributes)
|
599
|
+
metrics['genai_cost'].record(cost, attributes)
|
427
600
|
|
428
601
|
# Return original response
|
429
602
|
return response
|
430
603
|
|
431
604
|
except Exception as e:
|
432
605
|
handle_exception(span, e)
|
433
|
-
logger.error(
|
606
|
+
logger.error('Error in trace creation: %s', e)
|
434
607
|
|
435
608
|
# Return original response
|
436
609
|
return response
|