openlit 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. openlit/__helpers.py +5 -0
  2. openlit/__init__.py +3 -2
  3. openlit/instrumentation/ag2/ag2.py +3 -3
  4. openlit/instrumentation/ai21/ai21.py +1 -1
  5. openlit/instrumentation/ai21/async_ai21.py +1 -1
  6. openlit/instrumentation/anthropic/anthropic.py +1 -1
  7. openlit/instrumentation/anthropic/async_anthropic.py +1 -1
  8. openlit/instrumentation/astra/astra.py +5 -5
  9. openlit/instrumentation/astra/async_astra.py +5 -5
  10. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +3 -3
  11. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +3 -3
  12. openlit/instrumentation/chroma/chroma.py +5 -5
  13. openlit/instrumentation/cohere/async_cohere.py +1 -1
  14. openlit/instrumentation/cohere/cohere.py +2 -2
  15. openlit/instrumentation/controlflow/controlflow.py +3 -3
  16. openlit/instrumentation/crawl4ai/async_crawl4ai.py +3 -3
  17. openlit/instrumentation/crawl4ai/crawl4ai.py +3 -3
  18. openlit/instrumentation/crewai/crewai.py +4 -2
  19. openlit/instrumentation/dynamiq/dynamiq.py +3 -3
  20. openlit/instrumentation/elevenlabs/async_elevenlabs.py +1 -2
  21. openlit/instrumentation/elevenlabs/elevenlabs.py +1 -2
  22. openlit/instrumentation/embedchain/embedchain.py +5 -5
  23. openlit/instrumentation/firecrawl/firecrawl.py +3 -3
  24. openlit/instrumentation/gpt4all/__init__.py +2 -2
  25. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  26. openlit/instrumentation/gpu/__init__.py +5 -5
  27. openlit/instrumentation/groq/__init__.py +2 -2
  28. openlit/instrumentation/groq/async_groq.py +356 -240
  29. openlit/instrumentation/groq/groq.py +356 -240
  30. openlit/instrumentation/haystack/haystack.py +3 -3
  31. openlit/instrumentation/julep/async_julep.py +3 -3
  32. openlit/instrumentation/julep/julep.py +3 -3
  33. openlit/instrumentation/langchain/__init__.py +13 -7
  34. openlit/instrumentation/langchain/async_langchain.py +384 -0
  35. openlit/instrumentation/langchain/langchain.py +98 -490
  36. openlit/instrumentation/letta/letta.py +5 -3
  37. openlit/instrumentation/litellm/__init__.py +4 -5
  38. openlit/instrumentation/litellm/async_litellm.py +316 -245
  39. openlit/instrumentation/litellm/litellm.py +312 -241
  40. openlit/instrumentation/llamaindex/llamaindex.py +3 -3
  41. openlit/instrumentation/mem0/mem0.py +3 -3
  42. openlit/instrumentation/milvus/milvus.py +5 -5
  43. openlit/instrumentation/mistral/__init__.py +6 -6
  44. openlit/instrumentation/mistral/async_mistral.py +421 -248
  45. openlit/instrumentation/mistral/mistral.py +418 -244
  46. openlit/instrumentation/multion/async_multion.py +4 -2
  47. openlit/instrumentation/multion/multion.py +4 -2
  48. openlit/instrumentation/ollama/__init__.py +8 -30
  49. openlit/instrumentation/ollama/async_ollama.py +385 -417
  50. openlit/instrumentation/ollama/ollama.py +384 -417
  51. openlit/instrumentation/openai/async_openai.py +7 -9
  52. openlit/instrumentation/openai/openai.py +7 -9
  53. openlit/instrumentation/phidata/phidata.py +4 -2
  54. openlit/instrumentation/pinecone/pinecone.py +5 -5
  55. openlit/instrumentation/premai/__init__.py +2 -2
  56. openlit/instrumentation/premai/premai.py +262 -213
  57. openlit/instrumentation/qdrant/async_qdrant.py +5 -5
  58. openlit/instrumentation/qdrant/qdrant.py +5 -5
  59. openlit/instrumentation/reka/__init__.py +2 -2
  60. openlit/instrumentation/reka/async_reka.py +90 -52
  61. openlit/instrumentation/reka/reka.py +90 -52
  62. openlit/instrumentation/together/__init__.py +4 -4
  63. openlit/instrumentation/together/async_together.py +278 -236
  64. openlit/instrumentation/together/together.py +278 -236
  65. openlit/instrumentation/transformers/__init__.py +1 -1
  66. openlit/instrumentation/transformers/transformers.py +75 -44
  67. openlit/instrumentation/vertexai/__init__.py +14 -64
  68. openlit/instrumentation/vertexai/async_vertexai.py +329 -986
  69. openlit/instrumentation/vertexai/vertexai.py +329 -986
  70. openlit/instrumentation/vllm/__init__.py +1 -1
  71. openlit/instrumentation/vllm/vllm.py +62 -32
  72. openlit/semcov/__init__.py +3 -3
  73. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  74. openlit-1.33.10.dist-info/RECORD +122 -0
  75. openlit-1.33.9.dist-info/RECORD +0 -121
  76. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
  77. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/WHEEL +0 -0
@@ -1,29 +1,37 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Mistral API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
- from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, handle_exception
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
+ from openlit.__helpers import (
10
+ get_chat_model_cost,
11
+ get_embed_model_cost,
12
+ handle_exception,
13
+ response_as_dict,
14
+ calculate_ttft,
15
+ calculate_tbt,
16
+ create_metrics_attributes,
17
+ set_server_address_and_port
18
+ )
10
19
  from openlit.semcov import SemanticConvetion
11
20
 
12
21
  # Initialize logger for logging potential issues and operations
13
22
  logger = logging.getLogger(__name__)
14
23
 
15
- def async_chat(gen_ai_endpoint, version, environment, application_name,
16
- tracer, pricing_info, trace_content, metrics, disable_metrics):
24
+ def async_chat(version, environment, application_name, tracer,
25
+ pricing_info, trace_content, metrics, disable_metrics):
17
26
  """
18
27
  Generates a telemetry wrapper for chat to collect metrics.
19
28
 
20
29
  Args:
21
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
22
30
  version: Version of the monitoring package.
23
31
  environment: Deployment environment (e.g., production, staging).
24
- application_name: Name of the application using the OpenAI API.
32
+ application_name: Name of the application using the Mistral API.
25
33
  tracer: OpenTelemetry tracer for creating spans.
26
- pricing_info: Information used for calculating the cost of OpenAI usage.
34
+ pricing_info: Information used for calculating the cost of Mistral usage.
27
35
  trace_content: Flag indicating whether to trace the actual content.
28
36
 
29
37
  Returns:
@@ -47,71 +55,95 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
47
55
  The response from the original 'chat' method.
48
56
  """
49
57
 
50
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
51
- # Handling exception ensure observability without disrupting operation
58
+ server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
59
+ request_model = kwargs.get('model', 'mistral-small-latest')
60
+
61
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
62
+
63
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
64
+ start_time = time.time()
52
65
  response = await wrapped(*args, **kwargs)
66
+ end_time = time.time()
67
+
68
+ response_dict = response_as_dict(response)
53
69
 
54
70
  try:
55
71
  # Format 'messages' into a single string
56
- message_prompt = kwargs.get('messages', "")
72
+ message_prompt = kwargs.get('messages', '')
57
73
  formatted_messages = []
58
74
  for message in message_prompt:
59
- role = message["role"]
60
- content = message["content"]
75
+ role = message['role']
76
+ content = message['content']
61
77
 
62
78
  if isinstance(content, list):
63
79
  content_str = ", ".join(
64
- # pylint: disable=line-too-long
65
- f"{item['type']}: {item['text'] if 'text' in item else item['image_url']}"
66
- if 'type' in item else f"text: {item['text']}"
80
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
81
+ if "type" in item else f'text: {item["text"]}'
67
82
  for item in content
68
83
  )
69
- formatted_messages.append(f"{role}: {content_str}")
84
+ formatted_messages.append(f'{role}: {content_str}')
70
85
  else:
71
- formatted_messages.append(f"{role}: {content}")
72
- prompt = " ".join(formatted_messages)
86
+ formatted_messages.append(f'{role}: {content}')
87
+ prompt = '\n'.join(formatted_messages)
88
+
89
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
90
+ output_tokens = response_dict.get('usage').get('completion_tokens')
73
91
 
74
92
  # Calculate cost of the operation
75
- cost = get_chat_model_cost(kwargs.get("model", "mistral-small-latest"),
76
- pricing_info, response.usage.prompt_tokens,
77
- response.usage.completion_tokens)
93
+ cost = get_chat_model_cost(request_model,
94
+ pricing_info, input_tokens,
95
+ output_tokens)
78
96
 
79
- # Set Span attributes
80
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
81
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
82
- SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
97
+ # Set base span attribues (OTel Semconv)
98
+ span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
83
99
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
84
100
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
85
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
86
- gen_ai_endpoint)
87
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
88
- response.id)
89
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
90
- environment)
91
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
92
- application_name)
101
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
102
+ SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
93
103
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
94
- kwargs.get("model", "mistral-small-latest"))
104
+ request_model)
105
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
106
+ kwargs.get('seed', ''))
107
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
108
+ server_port)
109
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
110
+ kwargs.get('frequency_penalty', 0.0))
111
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
112
+ kwargs.get('max_tokens', -1))
113
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
114
+ kwargs.get('presence_penalty', 0.0))
115
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
116
+ kwargs.get('stop', []))
95
117
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
96
- kwargs.get("temperature", 0.7))
118
+ kwargs.get('temperature', 1.0))
97
119
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
98
- kwargs.get("top_p", 1.0))
99
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
100
- kwargs.get("max_tokens", -1))
101
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
102
- kwargs.get("random_seed", ""))
103
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
104
- False)
105
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
106
- [response.choices[0].finish_reason])
120
+ kwargs.get('top_p', 1.0))
121
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
122
+ response_dict.get('id'))
123
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
124
+ response_dict.get('model'))
107
125
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
108
- response.usage.prompt_tokens)
126
+ input_tokens)
109
127
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
110
- response.usage.completion_tokens)
128
+ output_tokens)
129
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
130
+ server_address)
131
+
132
+ # Set base span attribues (Extras)
133
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
134
+ environment)
135
+ span.set_attribute(SERVICE_NAME,
136
+ application_name)
137
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
138
+ False)
111
139
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
112
- response.usage.total_tokens)
140
+ input_tokens + output_tokens)
113
141
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
114
142
  cost)
143
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
144
+ end_time - start_time)
145
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
146
+ version)
115
147
  if trace_content:
116
148
  span.add_event(
117
149
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -119,64 +151,80 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
119
151
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
120
152
  },
121
153
  )
122
- span.add_event(
123
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
124
- attributes={
125
- # pylint: disable=line-too-long
126
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].message.content if response.choices[0].message.content else "",
127
- },
128
- )
154
+
155
+ for i in range(kwargs.get('n',1)):
156
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
157
+ [response_dict.get('choices')[i].get('finish_reason')])
158
+ if trace_content:
159
+ span.add_event(
160
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
161
+ attributes={
162
+ # pylint: disable=line-too-long
163
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
164
+ },
165
+ )
166
+ if kwargs.get('tools'):
167
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
168
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
169
+
170
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
171
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
172
+ 'text')
173
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
174
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
175
+ 'json')
129
176
 
130
177
  span.set_status(Status(StatusCode.OK))
131
178
 
132
179
  if disable_metrics is False:
133
- attributes = {
134
- TELEMETRY_SDK_NAME:
135
- "openlit",
136
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
137
- application_name,
138
- SemanticConvetion.GEN_AI_SYSTEM:
139
- SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
140
- SemanticConvetion.GEN_AI_ENVIRONMENT:
141
- environment,
142
- SemanticConvetion.GEN_AI_OPERATION:
143
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
144
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
145
- kwargs.get("model", "mistral-small-latest")
146
- }
147
-
148
- metrics["genai_requests"].add(1, attributes)
149
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
150
- metrics["genai_completion_tokens"].add(
151
- response.usage.completion_tokens, attributes
180
+ attributes = create_metrics_attributes(
181
+ service_name=application_name,
182
+ deployment_environment=environment,
183
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
184
+ system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
185
+ request_model=request_model,
186
+ server_address=server_address,
187
+ server_port=server_port,
188
+ response_model=response_dict.get('model'),
152
189
  )
153
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
154
- metrics["genai_cost"].record(cost)
190
+
191
+ metrics['genai_client_usage_tokens'].record(
192
+ input_tokens + output_tokens, attributes
193
+ )
194
+ metrics['genai_client_operation_duration'].record(
195
+ end_time - start_time, attributes
196
+ )
197
+ metrics['genai_server_ttft'].record(
198
+ end_time - start_time, attributes
199
+ )
200
+ metrics['genai_requests'].add(1, attributes)
201
+ metrics['genai_completion_tokens'].add(output_tokens, attributes)
202
+ metrics['genai_prompt_tokens'].add(input_tokens, attributes)
203
+ metrics['genai_cost'].record(cost, attributes)
155
204
 
156
205
  # Return original response
157
206
  return response
158
207
 
159
208
  except Exception as e:
160
209
  handle_exception(span, e)
161
- logger.error("Error in trace creation: %s", e)
210
+ logger.error('Error in trace creation: %s', e)
162
211
 
163
212
  # Return original response
164
213
  return response
165
214
 
166
215
  return wrapper
167
216
 
168
- def async_chat_stream(gen_ai_endpoint, version, environment, application_name,
169
- tracer, pricing_info, trace_content, metrics, disable_metrics):
217
+ def async_chat_stream(version, environment, application_name,
218
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
170
219
  """
171
220
  Generates a telemetry wrapper for chat_stream to collect metrics.
172
221
 
173
222
  Args:
174
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
175
223
  version: Version of the monitoring package.
176
224
  environment: Deployment environment (e.g., production, staging).
177
- application_name: Name of the application using the OpenAI API.
225
+ application_name: Name of the application using the Mistral API.
178
226
  tracer: OpenTelemetry tracer for creating spans.
179
- pricing_info: Information used for calculating the cost of OpenAI usage.
227
+ pricing_info: Information used for calculating the cost of Mistral usage.
180
228
  trace_content: Flag indicating whether to trace the actual content.
181
229
 
182
230
  Returns:
@@ -200,149 +248,260 @@ def async_chat_stream(gen_ai_endpoint, version, environment, application_name,
200
248
  The response from the original 'chat_stream' method.
201
249
  """
202
250
 
203
- async def stream_generator():
204
- # pylint: disable=line-too-long
205
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
251
+ class TracedAsyncStream:
252
+ """
253
+ Wrapper for streaming responses to collect metrics and trace data.
254
+ Wraps the 'mistral.syncStream' response to collect message IDs and aggregated response.
255
+
256
+ This class implements the '__aiter__' and '__anext__' methods that
257
+ handle asynchronous streaming responses.
258
+
259
+ This class also implements '__aenter__' and '__aexit__' methods that
260
+ handle asynchronous context management protocol.
261
+ """
262
+ def __init__(
263
+ self,
264
+ wrapped,
265
+ span,
266
+ kwargs,
267
+ server_address,
268
+ server_port,
269
+ **args,
270
+ ):
271
+ self.__wrapped__ = wrapped
272
+ self._span = span
206
273
  # Placeholder for aggregating streaming response
207
- llmresponse = ""
208
-
209
- # Loop through streaming events capturing relevant details
210
- async for event in await wrapped(*args, **kwargs):
211
- response_id = event.data.id
212
- llmresponse += event.data.choices[0].delta.content
213
- if event.data.usage is not None:
214
- prompt_tokens = event.data.usage.prompt_tokens
215
- completion_tokens = event.data.usage.completion_tokens
216
- total_tokens = event.data.usage.total_tokens
217
- finish_reason = event.data.choices[0].finish_reason
218
- yield event
219
-
220
- # Handling exception ensure observability without disrupting operation
274
+ self._llmresponse = ''
275
+ self._response_id = ''
276
+ self._response_model = ''
277
+ self._finish_reason = ''
278
+ self._input_tokens = ''
279
+ self._output_tokens = ''
280
+
281
+ self._args = args
282
+ self._kwargs = kwargs
283
+ self._start_time = time.time()
284
+ self._end_time = None
285
+ self._timestamps = []
286
+ self._ttft = 0
287
+ self._tbt = 0
288
+ self._server_address = server_address
289
+ self._server_port = server_port
290
+
291
+ async def __aenter__(self):
292
+ await self.__wrapped__.__aenter__()
293
+ return self
294
+
295
+ async def __aexit__(self, exc_type, exc_value, traceback):
296
+ await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
297
+
298
+ def __aiter__(self):
299
+ return self
300
+
301
+ async def __getattr__(self, name):
302
+ """Delegate attribute access to the wrapped object."""
303
+ return getattr(await self.__wrapped__, name)
304
+
305
+ async def __anext__(self):
221
306
  try:
222
- # Format 'messages' into a single string
223
- message_prompt = kwargs.get('messages', "")
224
- formatted_messages = []
225
- for message in message_prompt:
226
- role = message["role"]
227
- content = message["content"]
228
-
229
- if isinstance(content, list):
230
- content_str = ", ".join(
231
- # pylint: disable=line-too-long
232
- f"{item['type']}: {item['text'] if 'text' in item else item['image_url']}"
233
- if 'type' in item else f"text: {item['text']}"
234
- for item in content
235
- )
236
- formatted_messages.append(f"{role}: {content_str}")
307
+ chunk = await self.__wrapped__.__anext__()
308
+ end_time = time.time()
309
+ # Record the timestamp for the current chunk
310
+ self._timestamps.append(end_time)
311
+
312
+ if len(self._timestamps) == 1:
313
+ # Calculate time to first chunk
314
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
315
+
316
+ chunked = response_as_dict(chunk)
317
+
318
+ self._llmresponse += chunked.get('data').get('choices')[0].get('delta').get('content')
319
+ if chunked.get('data').get('usage') is not None:
320
+ self._response_id = chunked.get('data').get('id')
321
+ self._response_model = chunked.get('data').get('model')
322
+ self._input_tokens = chunked.get('data').get('usage').get('prompt_tokens')
323
+ self._output_tokens = chunked.get('data').get('usage').get('completion_tokens')
324
+ self._finish_reason = chunked.get('data').get('choices')[0].get('finish_reason')
325
+
326
+ return chunk
327
+ except StopAsyncIteration:
328
+ # Handling exception ensure observability without disrupting operation
329
+ try:
330
+ self._end_time = time.time()
331
+ if len(self._timestamps) > 1:
332
+ self._tbt = calculate_tbt(self._timestamps)
333
+
334
+ # Format 'messages' into a single string
335
+ message_prompt = self._kwargs.get('messages', '')
336
+ formatted_messages = []
337
+ for message in message_prompt:
338
+ role = message['role']
339
+ content = message['content']
340
+
341
+ if isinstance(content, list):
342
+ content_str_list = []
343
+ for item in content:
344
+ if item['type'] == 'text':
345
+ content_str_list.append(f'text: {item["text"]}')
346
+ elif (item['type'] == 'image_url' and
347
+ not item['image_url']['url'].startswith('data:')):
348
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
349
+ content_str = ", ".join(content_str_list)
350
+ formatted_messages.append(f'{role}: {content_str}')
351
+ else:
352
+ formatted_messages.append(f'{role}: {content}')
353
+ prompt = '\n'.join(formatted_messages)
354
+
355
+ request_model = self._kwargs.get('model', 'mistral-small-latest')
356
+
357
+ # Calculate cost of the operation
358
+ cost = get_chat_model_cost(request_model,
359
+ pricing_info, self._input_tokens,
360
+ self._output_tokens)
361
+
362
+ # Set Span attributes (OTel Semconv)
363
+ self._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
364
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
365
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
366
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
367
+ SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
368
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
369
+ request_model)
370
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
371
+ self._kwargs.get('seed', ''))
372
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
373
+ self._server_port)
374
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
375
+ self._kwargs.get('frequency_penalty', 0.0))
376
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
377
+ self._kwargs.get('max_tokens', -1))
378
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
379
+ self._kwargs.get('presence_penalty', 0.0))
380
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
381
+ self._kwargs.get('stop_sequences', []))
382
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
383
+ self._kwargs.get('temperature', 0.3))
384
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
385
+ self._kwargs.get('k', 1.0))
386
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
387
+ self._kwargs.get('p', 1.0))
388
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
389
+ [self._finish_reason])
390
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
391
+ self._response_id)
392
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
393
+ self._response_model)
394
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
395
+ self._input_tokens)
396
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
397
+ self._output_tokens)
398
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
399
+ self._server_address)
400
+
401
+ if isinstance(self._llmresponse, str):
402
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
403
+ 'text')
237
404
  else:
238
- formatted_messages.append(f"{role}: {content}")
239
- prompt = " ".join(formatted_messages)
240
-
241
- # Calculate cost of the operation
242
- cost = get_chat_model_cost(kwargs.get("model", "mistral-small-latest"),
243
- pricing_info, prompt_tokens, completion_tokens)
244
-
245
- # Set Span attributes
246
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
247
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
248
- SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
249
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
250
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
251
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
252
- gen_ai_endpoint)
253
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
254
- response_id)
255
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
256
- environment)
257
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
258
- application_name)
259
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
260
- kwargs.get("model", "mistral-small-latest"))
261
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
262
- kwargs.get("temperature", 0.7))
263
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
264
- kwargs.get("top_p", 1.0))
265
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
266
- kwargs.get("max_tokens", -1))
267
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
268
- kwargs.get("random_seed", ""))
269
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
270
- True)
271
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
272
- [finish_reason])
273
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
274
- prompt_tokens)
275
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
276
- completion_tokens)
277
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
278
- total_tokens)
279
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
280
- cost)
281
- if trace_content:
282
- span.add_event(
283
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
284
- attributes={
285
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
286
- },
287
- )
288
- span.add_event(
289
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
290
- attributes={
291
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
292
- },
293
- )
405
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
406
+ 'json')
407
+
408
+ # Set Span attributes (Extra)
409
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
410
+ environment)
411
+ self._span.set_attribute(SERVICE_NAME,
412
+ application_name)
413
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
414
+ True)
415
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
416
+ self._input_tokens + self._output_tokens)
417
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
418
+ cost)
419
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
420
+ self._tbt)
421
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
422
+ self._ttft)
423
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
424
+ version)
425
+ if trace_content:
426
+ self._span.add_event(
427
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
428
+ attributes={
429
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
430
+ },
431
+ )
432
+ self._span.add_event(
433
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
434
+ attributes={
435
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
436
+ },
437
+ )
438
+ self._span.set_status(Status(StatusCode.OK))
439
+
440
+ if disable_metrics is False:
441
+ attributes = create_metrics_attributes(
442
+ service_name=application_name,
443
+ deployment_environment=environment,
444
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
445
+ system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
446
+ request_model=request_model,
447
+ server_address=self._server_address,
448
+ server_port=self._server_port,
449
+ response_model=self._response_model,
450
+ )
451
+
452
+ metrics['genai_client_usage_tokens'].record(
453
+ self._input_tokens + self._output_tokens, attributes
454
+ )
455
+ metrics['genai_client_operation_duration'].record(
456
+ self._end_time - self._start_time, attributes
457
+ )
458
+ metrics['genai_server_tbt'].record(
459
+ self._tbt, attributes
460
+ )
461
+ metrics['genai_server_ttft'].record(
462
+ self._ttft, attributes
463
+ )
464
+ metrics['genai_requests'].add(1, attributes)
465
+ metrics['genai_completion_tokens'].add(self._output_tokens, attributes)
466
+ metrics['genai_prompt_tokens'].add(self._input_tokens, attributes)
467
+ metrics['genai_cost'].record(cost, attributes)
468
+
469
+ except Exception as e:
470
+ handle_exception(self._span, e)
471
+ logger.error('Error in trace creation: %s', e)
472
+ finally:
473
+ self._span.end()
474
+ raise
294
475
 
295
- span.set_status(Status(StatusCode.OK))
296
-
297
- if disable_metrics is False:
298
- attributes = {
299
- TELEMETRY_SDK_NAME:
300
- "openlit",
301
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
302
- application_name,
303
- SemanticConvetion.GEN_AI_SYSTEM:
304
- SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
305
- SemanticConvetion.GEN_AI_ENVIRONMENT:
306
- environment,
307
- SemanticConvetion.GEN_AI_OPERATION:
308
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
309
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
310
- kwargs.get("model", "mistral-small-latest")
311
- }
312
-
313
- metrics["genai_requests"].add(1, attributes)
314
- metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
315
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
316
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
317
- metrics["genai_cost"].record(cost)
318
-
319
- except Exception as e:
320
- handle_exception(span, e)
321
- logger.error("Error in trace creation: %s", e)
322
-
323
- return stream_generator()
476
+ server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
477
+ request_model = kwargs.get('model', 'mistral-small-latest')
478
+
479
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
480
+
481
+ awaited_wrapped = await wrapped(*args, **kwargs)
482
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
483
+ return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
324
484
 
325
485
  return wrapper
326
486
 
327
- def async_embeddings(gen_ai_endpoint, version, environment, application_name,
328
- tracer, pricing_info, trace_content, metrics, disable_metrics):
487
+ def async_embeddings(version, environment, application_name,
488
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
329
489
  """
330
490
  Generates a telemetry wrapper for embeddings to collect metrics.
331
491
 
332
492
  Args:
333
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
334
493
  version: Version of the monitoring package.
335
494
  environment: Deployment environment (e.g., production, staging).
336
- application_name: Name of the application using the OpenAI API.
495
+ application_name: Name of the application using the Mistral API.
337
496
  tracer: OpenTelemetry tracer for creating spans.
338
- pricing_info: Information used for calculating the cost of OpenAI usage.
497
+ pricing_info: Information used for calculating the cost of Mistral usage.
339
498
  trace_content: Flag indicating whether to trace the actual content.
340
499
 
341
500
  Returns:
342
501
  A function that wraps the embeddings method to add telemetry.
343
502
  """
344
503
 
345
- async def wrapper(wrapped, instance, args, kwargs):
504
+ def wrapper(wrapped, instance, args, kwargs):
346
505
  """
347
506
  Wraps the 'embeddings' API call to add telemetry.
348
507
 
@@ -359,78 +518,92 @@ def async_embeddings(gen_ai_endpoint, version, environment, application_name,
359
518
  The response from the original 'embeddings' method.
360
519
  """
361
520
 
362
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
363
- response = await wrapped(*args, **kwargs)
521
+ server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
522
+ request_model = kwargs.get('model', 'mistral-embed')
523
+
524
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
525
+
526
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
527
+ start_time = time.time()
528
+ response = wrapped(*args, **kwargs)
529
+ end_time = time.time()
364
530
 
531
+ response_dict = response_as_dict(response)
365
532
  try:
366
- # Get prompt from kwargs and store as a single string
367
- prompt = ', '.join(kwargs.get('inputs', []))
533
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
368
534
 
369
535
  # Calculate cost of the operation
370
- cost = get_embed_model_cost(kwargs.get('model', "mistral-embed"),
371
- pricing_info, response.usage.prompt_tokens)
536
+ cost = get_embed_model_cost(request_model,
537
+ pricing_info, input_tokens)
372
538
 
373
- # Set Span attributes
374
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
375
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
376
- SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
539
+ # Set Span attributes (OTel Semconv)
540
+ span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
377
541
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
378
542
  SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
379
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
380
- gen_ai_endpoint)
381
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
382
- environment)
383
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
384
- application_name)
543
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
544
+ SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
385
545
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
386
- kwargs.get('model', "mistral-embed"))
546
+ request_model)
387
547
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
388
- kwargs.get("encoding_format", "float"))
389
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
390
- response.id)
548
+ [kwargs.get('encoding_format', 'float')])
549
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
550
+ response_dict.get('model'))
551
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
552
+ server_address)
553
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
554
+ server_port)
391
555
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
392
- response.usage.prompt_tokens)
556
+ input_tokens)
557
+
558
+ # Set Span attributes (Extras)
559
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
560
+ environment)
561
+ span.set_attribute(SERVICE_NAME,
562
+ application_name)
393
563
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
394
- response.usage.total_tokens)
564
+ input_tokens)
395
565
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
396
566
  cost)
567
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
568
+ version)
569
+
397
570
  if trace_content:
398
571
  span.add_event(
399
572
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
400
573
  attributes={
401
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
574
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get('inputs', '')),
402
575
  },
403
576
  )
404
577
 
405
578
  span.set_status(Status(StatusCode.OK))
406
579
 
407
580
  if disable_metrics is False:
408
- attributes = {
409
- TELEMETRY_SDK_NAME:
410
- "openlit",
411
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
412
- application_name,
413
- SemanticConvetion.GEN_AI_SYSTEM:
414
- SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
415
- SemanticConvetion.GEN_AI_ENVIRONMENT:
416
- environment,
417
- SemanticConvetion.GEN_AI_OPERATION:
418
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
419
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
420
- kwargs.get('model', "mistral-embed")
421
- }
422
-
423
- metrics["genai_requests"].add(1, attributes)
424
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
425
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
426
- metrics["genai_cost"].record(cost, attributes)
581
+ attributes = create_metrics_attributes(
582
+ service_name=application_name,
583
+ deployment_environment=environment,
584
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
585
+ system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
586
+ request_model=request_model,
587
+ server_address=server_address,
588
+ server_port=server_port,
589
+ response_model=response_dict.get('model'),
590
+ )
591
+ metrics['genai_client_usage_tokens'].record(
592
+ input_tokens, attributes
593
+ )
594
+ metrics['genai_client_operation_duration'].record(
595
+ end_time - start_time, attributes
596
+ )
597
+ metrics['genai_requests'].add(1, attributes)
598
+ metrics['genai_prompt_tokens'].add(input_tokens, attributes)
599
+ metrics['genai_cost'].record(cost, attributes)
427
600
 
428
601
  # Return original response
429
602
  return response
430
603
 
431
604
  except Exception as e:
432
605
  handle_exception(span, e)
433
- logger.error("Error in trace creation: %s", e)
606
+ logger.error('Error in trace creation: %s', e)
434
607
 
435
608
  # Return original response
436
609
  return response