openlit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,80 @@
1
+ # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
+ """Initializer of Auto Instrumentation of Mistral Functions"""
3
+ from typing import Collection
4
+ import importlib.metadata
5
+ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
6
+ from wrapt import wrap_function_wrapper
7
+
8
+ from openlit.instrumentation.mistral.mistral import chat, chat_stream, embeddings
9
+ from openlit.instrumentation.mistral.async_mistral import async_chat, async_chat_stream
10
+ from openlit.instrumentation.mistral.async_mistral import async_embeddings
11
+
12
+ _instruments = ("mistralai >= 0.1.0",)
13
+
14
+ class MistralInstrumentor(BaseInstrumentor):
15
+ """An instrumentor for Azure Mistral's client library."""
16
+
17
+ def instrumentation_dependencies(self) -> Collection[str]:
18
+ return _instruments
19
+
20
+ def _instrument(self, **kwargs):
21
+ application_name = kwargs.get("application_name")
22
+ environment = kwargs.get("environment")
23
+ tracer = kwargs.get("tracer")
24
+ metrics = kwargs.get("metrics_dict")
25
+ pricing_info = kwargs.get("pricing_info")
26
+ trace_content = kwargs.get("trace_content")
27
+ disable_metrics = kwargs.get("disable_metrics")
28
+ version = importlib.metadata.version("mistralai")
29
+
30
+ #sync
31
+ wrap_function_wrapper(
32
+ "mistralai.client",
33
+ "MistralClient.chat",
34
+ chat("mistral.chat", version, environment, application_name,
35
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
36
+ )
37
+
38
+ #sync
39
+ wrap_function_wrapper(
40
+ "mistralai.client",
41
+ "MistralClient.chat_stream",
42
+ chat_stream("mistral.chat", version, environment, application_name,
43
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
44
+ )
45
+
46
+ #sync
47
+ wrap_function_wrapper(
48
+ "mistralai.client",
49
+ "MistralClient.embeddings",
50
+ embeddings("mistral.embeddings", version, environment, application_name,
51
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
52
+ )
53
+
54
+ # Async
55
+ wrap_function_wrapper(
56
+ "mistralai.async_client",
57
+ "MistralAsyncClient.chat",
58
+ async_chat("mistral.chat", version, environment, application_name,
59
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
60
+ )
61
+
62
+ #sync
63
+ wrap_function_wrapper(
64
+ "mistralai.async_client",
65
+ "MistralAsyncClient.chat_stream",
66
+ async_chat_stream("mistral.chat", version, environment, application_name,
67
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
68
+ )
69
+
70
+ #sync
71
+ wrap_function_wrapper(
72
+ "mistralai.async_client",
73
+ "MistralAsyncClient.embeddings",
74
+ async_embeddings("mistral.embeddings", version, environment, application_name,
75
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
76
+ )
77
+
78
+ @staticmethod
79
+ def _uninstrument(self, **kwargs):
80
+ pass
@@ -0,0 +1,417 @@
1
+ # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument
2
+ """
3
+ Module for monitoring Mistral API calls.
4
+ """
5
+
6
+ import logging
7
+ from opentelemetry.trace import SpanKind, Status, StatusCode
8
+ from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
+ from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, handle_exception
10
+ from openlit.semcov import SemanticConvetion
11
+
12
+ # Initialize logger for logging potential issues and operations
13
+ logger = logging.getLogger(__name__)
14
+
15
+ def async_chat(gen_ai_endpoint, version, environment, application_name,
16
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
17
+ """
18
+ Generates a telemetry wrapper for chat to collect metrics.
19
+
20
+ Args:
21
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
22
+ version: Version of the monitoring package.
23
+ environment: Deployment environment (e.g., production, staging).
24
+ application_name: Name of the application using the OpenAI API.
25
+ tracer: OpenTelemetry tracer for creating spans.
26
+ pricing_info: Information used for calculating the cost of OpenAI usage.
27
+ trace_content: Flag indicating whether to trace the actual content.
28
+
29
+ Returns:
30
+ A function that wraps the chat method to add telemetry.
31
+ """
32
+
33
+ async def wrapper(wrapped, instance, args, kwargs):
34
+ """
35
+ Wraps the 'chat' API call to add telemetry.
36
+
37
+ This collects metrics such as execution time, cost, and token usage, and handles errors
38
+ gracefully, adding details to the trace for observability.
39
+
40
+ Args:
41
+ wrapped: The original 'chat' method to be wrapped.
42
+ instance: The instance of the class where the original method is defined.
43
+ args: Positional arguments for the 'chat' method.
44
+ kwargs: Keyword arguments for the 'chat' method.
45
+
46
+ Returns:
47
+ The response from the original 'chat' method.
48
+ """
49
+
50
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
51
+ # Handling exception ensure observability without disrupting operation
52
+ response = await wrapped(*args, **kwargs)
53
+
54
+ try:
55
+ # Format 'messages' into a single string
56
+ message_prompt = kwargs.get('messages', "")
57
+ formatted_messages = []
58
+ for message in message_prompt:
59
+ role = message.role
60
+ content = message.content
61
+
62
+ if isinstance(content, list):
63
+ content_str = ", ".join(
64
+ # pylint: disable=line-too-long
65
+ f"{item['type']}: {item['text'] if 'text' in item else item['image_url']}"
66
+ if 'type' in item else f"text: {item['text']}"
67
+ for item in content
68
+ )
69
+ formatted_messages.append(f"{role}: {content_str}")
70
+ else:
71
+ formatted_messages.append(f"{role}: {content}")
72
+ prompt = " ".join(formatted_messages)
73
+
74
+ # Calculate cost of the operation
75
+ cost = get_chat_model_cost(kwargs.get("model", "mistral-small-latest"),
76
+ pricing_info, response.usage.prompt_tokens,
77
+ response.usage.completion_tokens)
78
+
79
+ # Set Span attributes
80
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
81
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
82
+ SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
83
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
84
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
85
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
86
+ gen_ai_endpoint)
87
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
88
+ response.id)
89
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
90
+ environment)
91
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
92
+ application_name)
93
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
94
+ kwargs.get("model", "mistral-small-latest"))
95
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
96
+ kwargs.get("temperature", 0.7))
97
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
98
+ kwargs.get("top_p", 1))
99
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
100
+ kwargs.get("max_tokens", ""))
101
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
102
+ kwargs.get("random_seed", ""))
103
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
104
+ False)
105
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
106
+ response.choices[0].finish_reason)
107
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
108
+ response.usage.prompt_tokens)
109
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
110
+ response.usage.completion_tokens)
111
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
112
+ response.usage.total_tokens)
113
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
114
+ cost)
115
+ if trace_content:
116
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
117
+ prompt)
118
+ # pylint: disable=line-too-long
119
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.choices[0].message.content if response.choices[0].message.content else "")
120
+
121
+ span.set_status(Status(StatusCode.OK))
122
+
123
+ if disable_metrics is False:
124
+ attributes = {
125
+ TELEMETRY_SDK_NAME:
126
+ "openlit",
127
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
128
+ application_name,
129
+ SemanticConvetion.GEN_AI_SYSTEM:
130
+ SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
131
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
132
+ environment,
133
+ SemanticConvetion.GEN_AI_TYPE:
134
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
135
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
136
+ kwargs.get("model", "mistral-small-latest")
137
+ }
138
+
139
+ metrics["genai_requests"].add(1, attributes)
140
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
141
+ metrics["genai_completion_tokens"].add(
142
+ response.usage.completion_tokens, attributes
143
+ )
144
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
145
+ metrics["genai_cost"].record(cost)
146
+
147
+ # Return original response
148
+ return response
149
+
150
+ except Exception as e:
151
+ handle_exception(span, e)
152
+ logger.error("Error in trace creation: %s", e)
153
+
154
+ # Return original response
155
+ return response
156
+
157
+ return wrapper
158
+
159
+ def async_chat_stream(gen_ai_endpoint, version, environment, application_name,
160
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
161
+ """
162
+ Generates a telemetry wrapper for chat_stream to collect metrics.
163
+
164
+ Args:
165
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
166
+ version: Version of the monitoring package.
167
+ environment: Deployment environment (e.g., production, staging).
168
+ application_name: Name of the application using the OpenAI API.
169
+ tracer: OpenTelemetry tracer for creating spans.
170
+ pricing_info: Information used for calculating the cost of OpenAI usage.
171
+ trace_content: Flag indicating whether to trace the actual content.
172
+
173
+ Returns:
174
+ A function that wraps the chat method to add telemetry.
175
+ """
176
+
177
+ async def wrapper(wrapped, instance, args, kwargs):
178
+ """
179
+ Wraps the 'chat_stream' API call to add telemetry.
180
+
181
+ This collects metrics such as execution time, cost, and token usage, and handles errors
182
+ gracefully, adding details to the trace for observability.
183
+
184
+ Args:
185
+ wrapped: The original 'chat_stream' method to be wrapped.
186
+ instance: The instance of the class where the original method is defined.
187
+ args: Positional arguments for the 'chat_stream' method.
188
+ kwargs: Keyword arguments for the 'chat_stream' method.
189
+
190
+ Returns:
191
+ The response from the original 'chat_stream' method.
192
+ """
193
+
194
+ async def stream_generator():
195
+ # pylint: disable=line-too-long
196
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
197
+ # Placeholder for aggregating streaming response
198
+ llmresponse = ""
199
+
200
+ # Loop through streaming events capturing relevant details
201
+ async for event in wrapped(*args, **kwargs):
202
+ response_id = event.id
203
+ llmresponse += event.choices[0].delta.content
204
+ if event.usage is not None:
205
+ prompt_tokens = event.usage.prompt_tokens
206
+ completion_tokens = event.usage.completion_tokens
207
+ total_tokens = event.usage.total_tokens
208
+ finish_reason = event.choices[0].finish_reason
209
+ yield event
210
+
211
+ # Handling exception ensure observability without disrupting operation
212
+ try:
213
+ # Format 'messages' into a single string
214
+ message_prompt = kwargs.get('messages', "")
215
+ formatted_messages = []
216
+ for message in message_prompt:
217
+ role = message.role
218
+ content = message.content
219
+
220
+ if isinstance(content, list):
221
+ content_str = ", ".join(
222
+ # pylint: disable=line-too-long
223
+ f"{item['type']}: {item['text'] if 'text' in item else item['image_url']}"
224
+ if 'type' in item else f"text: {item['text']}"
225
+ for item in content
226
+ )
227
+ formatted_messages.append(f"{role}: {content_str}")
228
+ else:
229
+ formatted_messages.append(f"{role}: {content}")
230
+ prompt = " ".join(formatted_messages)
231
+
232
+ # Calculate cost of the operation
233
+ cost = get_chat_model_cost(kwargs.get("model", "mistral-small-latest"),
234
+ pricing_info, prompt_tokens, completion_tokens)
235
+
236
+ # Set Span attributes
237
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
238
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
239
+ SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
240
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
241
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
242
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
243
+ gen_ai_endpoint)
244
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
245
+ response_id)
246
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
247
+ environment)
248
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
249
+ application_name)
250
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
251
+ kwargs.get("model", "mistral-small-latest"))
252
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
253
+ kwargs.get("temperature", 0.7))
254
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
255
+ kwargs.get("top_p", 1))
256
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
257
+ kwargs.get("max_tokens", ""))
258
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
259
+ kwargs.get("random_seed", ""))
260
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
261
+ True)
262
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
263
+ finish_reason)
264
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
265
+ prompt_tokens)
266
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
267
+ completion_tokens)
268
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
269
+ total_tokens)
270
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
271
+ cost)
272
+ if trace_content:
273
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
274
+ prompt)
275
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
276
+ llmresponse)
277
+
278
+ span.set_status(Status(StatusCode.OK))
279
+
280
+ if disable_metrics is False:
281
+ attributes = {
282
+ TELEMETRY_SDK_NAME:
283
+ "openlit",
284
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
285
+ application_name,
286
+ SemanticConvetion.GEN_AI_SYSTEM:
287
+ SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
288
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
289
+ environment,
290
+ SemanticConvetion.GEN_AI_TYPE:
291
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
292
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
293
+ kwargs.get("model", "mistral-small-latest")
294
+ }
295
+
296
+ metrics["genai_requests"].add(1, attributes)
297
+ metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
298
+ metrics["genai_completion_tokens"].add(completion_tokens, attributes)
299
+ metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
300
+ metrics["genai_cost"].record(cost)
301
+
302
+ except Exception as e:
303
+ handle_exception(span, e)
304
+ logger.error("Error in trace creation: %s", e)
305
+
306
+ return stream_generator()
307
+
308
+ return wrapper
309
+
310
+ def async_embeddings(gen_ai_endpoint, version, environment, application_name,
311
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
312
+ """
313
+ Generates a telemetry wrapper for embeddings to collect metrics.
314
+
315
+ Args:
316
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
317
+ version: Version of the monitoring package.
318
+ environment: Deployment environment (e.g., production, staging).
319
+ application_name: Name of the application using the OpenAI API.
320
+ tracer: OpenTelemetry tracer for creating spans.
321
+ pricing_info: Information used for calculating the cost of OpenAI usage.
322
+ trace_content: Flag indicating whether to trace the actual content.
323
+
324
+ Returns:
325
+ A function that wraps the embeddings method to add telemetry.
326
+ """
327
+
328
+ async def wrapper(wrapped, instance, args, kwargs):
329
+ """
330
+ Wraps the 'embeddings' API call to add telemetry.
331
+
332
+ This collects metrics such as execution time, cost, and token usage, and handles errors
333
+ gracefully, adding details to the trace for observability.
334
+
335
+ Args:
336
+ wrapped: The original 'embeddings' method to be wrapped.
337
+ instance: The instance of the class where the original method is defined.
338
+ args: Positional arguments for the 'embeddings' method.
339
+ kwargs: Keyword arguments for the 'embeddings' method.
340
+
341
+ Returns:
342
+ The response from the original 'embeddings' method.
343
+ """
344
+
345
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
346
+ response = await wrapped(*args, **kwargs)
347
+
348
+ try:
349
+ # Get prompt from kwargs and store as a single string
350
+ prompt = ', '.join(kwargs.get('input', []))
351
+
352
+ # Calculate cost of the operation
353
+ cost = get_embed_model_cost(kwargs.get('model', "mistral-embed"),
354
+ pricing_info, response.usage.prompt_tokens)
355
+
356
+ # Set Span attributes
357
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
358
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
359
+ SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
360
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
361
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
362
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
363
+ gen_ai_endpoint)
364
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
365
+ environment)
366
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
367
+ application_name)
368
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
369
+ kwargs.get('model', "mistral-embed"))
370
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
371
+ kwargs.get("encoding_format", "float"))
372
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
373
+ response.id)
374
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
375
+ response.usage.prompt_tokens)
376
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
377
+ response.usage.total_tokens)
378
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
379
+ cost)
380
+ if trace_content:
381
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
382
+ prompt)
383
+
384
+ span.set_status(Status(StatusCode.OK))
385
+
386
+ if disable_metrics is False:
387
+ attributes = {
388
+ TELEMETRY_SDK_NAME:
389
+ "openlit",
390
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
391
+ application_name,
392
+ SemanticConvetion.GEN_AI_SYSTEM:
393
+ SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
394
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
395
+ environment,
396
+ SemanticConvetion.GEN_AI_TYPE:
397
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
398
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
399
+ kwargs.get('model', "mistral-embed")
400
+ }
401
+
402
+ metrics["genai_requests"].add(1, attributes)
403
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
404
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
405
+ metrics["genai_cost"].record(cost, attributes)
406
+
407
+ # Return original response
408
+ return response
409
+
410
+ except Exception as e:
411
+ handle_exception(span, e)
412
+ logger.error("Error in trace creation: %s", e)
413
+
414
+ # Return original response
415
+ return response
416
+
417
+ return wrapper