openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. openlit/__helpers.py +88 -0
  2. openlit/__init__.py +4 -3
  3. openlit/instrumentation/ag2/ag2.py +5 -5
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +9 -9
  13. openlit/instrumentation/astra/async_astra.py +9 -9
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +9 -9
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +5 -5
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  26. openlit/instrumentation/crewai/crewai.py +6 -4
  27. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
  30. openlit/instrumentation/embedchain/embedchain.py +9 -9
  31. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/__init__.py +2 -2
  36. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  37. openlit/instrumentation/gpu/__init__.py +5 -5
  38. openlit/instrumentation/groq/__init__.py +2 -2
  39. openlit/instrumentation/groq/async_groq.py +356 -240
  40. openlit/instrumentation/groq/groq.py +356 -240
  41. openlit/instrumentation/haystack/haystack.py +5 -5
  42. openlit/instrumentation/julep/async_julep.py +5 -5
  43. openlit/instrumentation/julep/julep.py +5 -5
  44. openlit/instrumentation/langchain/__init__.py +13 -7
  45. openlit/instrumentation/langchain/async_langchain.py +384 -0
  46. openlit/instrumentation/langchain/langchain.py +105 -492
  47. openlit/instrumentation/letta/letta.py +11 -9
  48. openlit/instrumentation/litellm/__init__.py +4 -5
  49. openlit/instrumentation/litellm/async_litellm.py +318 -247
  50. openlit/instrumentation/litellm/litellm.py +314 -243
  51. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  52. openlit/instrumentation/mem0/mem0.py +5 -5
  53. openlit/instrumentation/milvus/milvus.py +9 -9
  54. openlit/instrumentation/mistral/__init__.py +6 -6
  55. openlit/instrumentation/mistral/async_mistral.py +423 -250
  56. openlit/instrumentation/mistral/mistral.py +420 -246
  57. openlit/instrumentation/multion/async_multion.py +6 -4
  58. openlit/instrumentation/multion/multion.py +6 -4
  59. openlit/instrumentation/ollama/__init__.py +8 -30
  60. openlit/instrumentation/ollama/async_ollama.py +385 -417
  61. openlit/instrumentation/ollama/ollama.py +384 -417
  62. openlit/instrumentation/openai/__init__.py +11 -230
  63. openlit/instrumentation/openai/async_openai.py +433 -410
  64. openlit/instrumentation/openai/openai.py +414 -394
  65. openlit/instrumentation/phidata/phidata.py +6 -4
  66. openlit/instrumentation/pinecone/pinecone.py +9 -9
  67. openlit/instrumentation/premai/__init__.py +2 -2
  68. openlit/instrumentation/premai/premai.py +262 -213
  69. openlit/instrumentation/qdrant/async_qdrant.py +9 -9
  70. openlit/instrumentation/qdrant/qdrant.py +9 -9
  71. openlit/instrumentation/reka/__init__.py +2 -2
  72. openlit/instrumentation/reka/async_reka.py +90 -52
  73. openlit/instrumentation/reka/reka.py +90 -52
  74. openlit/instrumentation/together/__init__.py +4 -4
  75. openlit/instrumentation/together/async_together.py +278 -236
  76. openlit/instrumentation/together/together.py +278 -236
  77. openlit/instrumentation/transformers/__init__.py +1 -1
  78. openlit/instrumentation/transformers/transformers.py +76 -45
  79. openlit/instrumentation/vertexai/__init__.py +14 -64
  80. openlit/instrumentation/vertexai/async_vertexai.py +330 -987
  81. openlit/instrumentation/vertexai/vertexai.py +330 -987
  82. openlit/instrumentation/vllm/__init__.py +1 -1
  83. openlit/instrumentation/vllm/vllm.py +66 -36
  84. openlit/otel/metrics.py +98 -7
  85. openlit/semcov/__init__.py +113 -80
  86. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  87. openlit-1.33.10.dist-info/RECORD +122 -0
  88. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
  89. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  90. openlit/instrumentation/openai/azure_openai.py +0 -898
  91. openlit-1.33.8.dist-info/RECORD +0 -122
  92. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -1,29 +1,37 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Mistral API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
- from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, handle_exception
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
+ from openlit.__helpers import (
10
+ get_chat_model_cost,
11
+ get_embed_model_cost,
12
+ handle_exception,
13
+ response_as_dict,
14
+ calculate_ttft,
15
+ calculate_tbt,
16
+ create_metrics_attributes,
17
+ set_server_address_and_port
18
+ )
10
19
  from openlit.semcov import SemanticConvetion
11
20
 
12
21
  # Initialize logger for logging potential issues and operations
13
22
  logger = logging.getLogger(__name__)
14
23
 
15
- def chat(gen_ai_endpoint, version, environment, application_name,
16
- tracer, pricing_info, trace_content, metrics, disable_metrics):
24
+ def chat(version, environment, application_name, tracer,
25
+ pricing_info, trace_content, metrics, disable_metrics):
17
26
  """
18
27
  Generates a telemetry wrapper for chat to collect metrics.
19
28
 
20
29
  Args:
21
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
22
30
  version: Version of the monitoring package.
23
31
  environment: Deployment environment (e.g., production, staging).
24
- application_name: Name of the application using the OpenAI API.
32
+ application_name: Name of the application using the Mistral API.
25
33
  tracer: OpenTelemetry tracer for creating spans.
26
- pricing_info: Information used for calculating the cost of OpenAI usage.
34
+ pricing_info: Information used for calculating the cost of Mistral usage.
27
35
  trace_content: Flag indicating whether to trace the actual content.
28
36
 
29
37
  Returns:
@@ -47,70 +55,95 @@ def chat(gen_ai_endpoint, version, environment, application_name,
47
55
  The response from the original 'chat' method.
48
56
  """
49
57
 
50
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
58
+ server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
59
+ request_model = kwargs.get('model', 'mistral-small-latest')
60
+
61
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
62
+
63
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
64
+ start_time = time.time()
51
65
  response = wrapped(*args, **kwargs)
66
+ end_time = time.time()
67
+
68
+ response_dict = response_as_dict(response)
52
69
 
53
70
  try:
54
71
  # Format 'messages' into a single string
55
- message_prompt = kwargs.get('messages', "")
72
+ message_prompt = kwargs.get('messages', '')
56
73
  formatted_messages = []
57
74
  for message in message_prompt:
58
- role = message["role"]
59
- content = message["content"]
75
+ role = message['role']
76
+ content = message['content']
60
77
 
61
78
  if isinstance(content, list):
62
79
  content_str = ", ".join(
63
- # pylint: disable=line-too-long
64
- f"{item['type']}: {item['text'] if 'text' in item else item['image_url']}"
65
- if 'type' in item else f"text: {item['text']}"
80
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
81
+ if "type" in item else f'text: {item["text"]}'
66
82
  for item in content
67
83
  )
68
- formatted_messages.append(f"{role}: {content_str}")
84
+ formatted_messages.append(f'{role}: {content_str}')
69
85
  else:
70
- formatted_messages.append(f"{role}: {content}")
71
- prompt = " ".join(formatted_messages)
86
+ formatted_messages.append(f'{role}: {content}')
87
+ prompt = '\n'.join(formatted_messages)
72
88
 
73
- # Calculate cost of the operation
74
- cost = get_chat_model_cost(kwargs.get("model", "mistral-small-latest"),
75
- pricing_info, response.usage.prompt_tokens,
76
- response.usage.completion_tokens)
89
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
90
+ output_tokens = response_dict.get('usage').get('completion_tokens')
77
91
 
78
- # Set Span attributes
79
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
92
+ # Calculate cost of the operation
93
+ cost = get_chat_model_cost(request_model,
94
+ pricing_info, input_tokens,
95
+ output_tokens)
96
+
97
+ # Set base span attribues (OTel Semconv)
98
+ span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
99
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
100
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
80
101
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
81
102
  SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
82
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
83
- SemanticConvetion.GEN_AI_TYPE_CHAT)
84
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
85
- gen_ai_endpoint)
86
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
87
- response.id)
88
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
89
- environment)
90
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
91
- application_name)
92
103
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
93
- kwargs.get("model", "mistral-small-latest"))
104
+ request_model)
105
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
106
+ kwargs.get('seed', ''))
107
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
108
+ server_port)
109
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
110
+ kwargs.get('frequency_penalty', 0.0))
111
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
112
+ kwargs.get('max_tokens', -1))
113
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
114
+ kwargs.get('presence_penalty', 0.0))
115
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
116
+ kwargs.get('stop', []))
94
117
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
95
- kwargs.get("temperature", 0.7))
118
+ kwargs.get('temperature', 1.0))
96
119
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
97
- kwargs.get("top_p", 1.0))
98
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
99
- kwargs.get("max_tokens", -1))
100
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
101
- kwargs.get("random_seed", ""))
120
+ kwargs.get('top_p', 1.0))
121
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
122
+ response_dict.get('id'))
123
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
124
+ response_dict.get('model'))
125
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
126
+ input_tokens)
127
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
128
+ output_tokens)
129
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
130
+ server_address)
131
+
132
+ # Set base span attribues (Extras)
133
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
134
+ environment)
135
+ span.set_attribute(SERVICE_NAME,
136
+ application_name)
102
137
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
103
138
  False)
104
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
105
- [response.choices[0].finish_reason])
106
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
107
- response.usage.prompt_tokens)
108
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
109
- response.usage.completion_tokens)
110
139
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
111
- response.usage.total_tokens)
140
+ input_tokens + output_tokens)
112
141
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
113
142
  cost)
143
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
144
+ end_time - start_time)
145
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
146
+ version)
114
147
  if trace_content:
115
148
  span.add_event(
116
149
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -118,64 +151,80 @@ def chat(gen_ai_endpoint, version, environment, application_name,
118
151
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
119
152
  },
120
153
  )
121
- span.add_event(
122
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
123
- # pylint: disable=line-too-long
124
- attributes={
125
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].message.content if response.choices[0].message.content else "",
126
- },
127
- )
154
+
155
+ for i in range(kwargs.get('n',1)):
156
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
157
+ [response_dict.get('choices')[i].get('finish_reason')])
158
+ if trace_content:
159
+ span.add_event(
160
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
161
+ attributes={
162
+ # pylint: disable=line-too-long
163
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
164
+ },
165
+ )
166
+ if kwargs.get('tools'):
167
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
168
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
169
+
170
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
171
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
172
+ 'text')
173
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
174
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
175
+ 'json')
128
176
 
129
177
  span.set_status(Status(StatusCode.OK))
130
178
 
131
179
  if disable_metrics is False:
132
- attributes = {
133
- TELEMETRY_SDK_NAME:
134
- "openlit",
135
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
136
- application_name,
137
- SemanticConvetion.GEN_AI_SYSTEM:
138
- SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
139
- SemanticConvetion.GEN_AI_ENVIRONMENT:
140
- environment,
141
- SemanticConvetion.GEN_AI_TYPE:
142
- SemanticConvetion.GEN_AI_TYPE_CHAT,
143
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
144
- kwargs.get("model", "mistral-small-latest")
145
- }
146
-
147
- metrics["genai_requests"].add(1, attributes)
148
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
149
- metrics["genai_completion_tokens"].add(
150
- response.usage.completion_tokens, attributes
180
+ attributes = create_metrics_attributes(
181
+ service_name=application_name,
182
+ deployment_environment=environment,
183
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
184
+ system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
185
+ request_model=request_model,
186
+ server_address=server_address,
187
+ server_port=server_port,
188
+ response_model=response_dict.get('model'),
189
+ )
190
+
191
+ metrics['genai_client_usage_tokens'].record(
192
+ input_tokens + output_tokens, attributes
193
+ )
194
+ metrics['genai_client_operation_duration'].record(
195
+ end_time - start_time, attributes
151
196
  )
152
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
153
- metrics["genai_cost"].record(cost, attributes)
197
+ metrics['genai_server_ttft'].record(
198
+ end_time - start_time, attributes
199
+ )
200
+ metrics['genai_requests'].add(1, attributes)
201
+ metrics['genai_completion_tokens'].add(output_tokens, attributes)
202
+ metrics['genai_prompt_tokens'].add(input_tokens, attributes)
203
+ metrics['genai_cost'].record(cost, attributes)
154
204
 
155
205
  # Return original response
156
206
  return response
157
207
 
158
208
  except Exception as e:
159
209
  handle_exception(span, e)
160
- logger.error("Error in trace creation: %s", e)
210
+ logger.error('Error in trace creation: %s', e)
161
211
 
162
212
  # Return original response
163
213
  return response
164
214
 
165
215
  return wrapper
166
216
 
167
- def chat_stream(gen_ai_endpoint, version, environment, application_name,
217
+ def chat_stream(version, environment, application_name,
168
218
  tracer, pricing_info, trace_content, metrics, disable_metrics):
169
219
  """
170
220
  Generates a telemetry wrapper for chat_stream to collect metrics.
171
221
 
172
222
  Args:
173
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
174
223
  version: Version of the monitoring package.
175
224
  environment: Deployment environment (e.g., production, staging).
176
- application_name: Name of the application using the OpenAI API.
225
+ application_name: Name of the application using the Mistral API.
177
226
  tracer: OpenTelemetry tracer for creating spans.
178
- pricing_info: Information used for calculating the cost of OpenAI usage.
227
+ pricing_info: Information used for calculating the cost of Mistral usage.
179
228
  trace_content: Flag indicating whether to trace the actual content.
180
229
 
181
230
  Returns:
@@ -199,142 +248,253 @@ def chat_stream(gen_ai_endpoint, version, environment, application_name,
199
248
  The response from the original 'chat_stream' method.
200
249
  """
201
250
 
202
- def stream_generator():
203
- # pylint: disable=line-too-long
204
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
251
+ class TracedSyncStream:
252
+ """
253
+ Wrapper for streaming responses to collect metrics and trace data.
254
+ Wraps the 'mistral.syncStream' response to collect message IDs and aggregated response.
255
+
256
+ This class implements the '__aiter__' and '__anext__' methods that
257
+ handle asynchronous streaming responses.
258
+
259
+ This class also implements '__aenter__' and '__aexit__' methods that
260
+ handle asynchronous context management protocol.
261
+ """
262
+ def __init__(
263
+ self,
264
+ wrapped,
265
+ span,
266
+ kwargs,
267
+ server_address,
268
+ server_port,
269
+ **args,
270
+ ):
271
+ self.__wrapped__ = wrapped
272
+ self._span = span
205
273
  # Placeholder for aggregating streaming response
206
- llmresponse = ""
207
-
208
- # Loop through streaming events capturing relevant details
209
- for event in wrapped(*args, **kwargs):
210
- response_id = event.data.id
211
- llmresponse += event.data.choices[0].delta.content
212
- if event.data.usage is not None:
213
- prompt_tokens = event.data.usage.prompt_tokens
214
- completion_tokens = event.data.usage.completion_tokens
215
- total_tokens = event.data.usage.total_tokens
216
- finish_reason = event.data.choices[0].finish_reason
217
- yield event
218
-
219
- # Handling exception ensure observability without disrupting operation
274
+ self._llmresponse = ''
275
+ self._response_id = ''
276
+ self._response_model = ''
277
+ self._finish_reason = ''
278
+ self._input_tokens = ''
279
+ self._output_tokens = ''
280
+
281
+ self._args = args
282
+ self._kwargs = kwargs
283
+ self._start_time = time.time()
284
+ self._end_time = None
285
+ self._timestamps = []
286
+ self._ttft = 0
287
+ self._tbt = 0
288
+ self._server_address = server_address
289
+ self._server_port = server_port
290
+
291
+ def __enter__(self):
292
+ self.__wrapped__.__enter__()
293
+ return self
294
+
295
+ def __exit__(self, exc_type, exc_value, traceback):
296
+ self.__wrapped__.__exit__(exc_type, exc_value, traceback)
297
+
298
+ def __iter__(self):
299
+ return self
300
+
301
+ def __getattr__(self, name):
302
+ """Delegate attribute access to the wrapped object."""
303
+ return getattr(self.__wrapped__, name)
304
+
305
+ def __next__(self):
220
306
  try:
221
- # Format 'messages' into a single string
222
- message_prompt = kwargs.get('messages', "")
223
- formatted_messages = []
224
- for message in message_prompt:
225
- role = message["role"]
226
- content = message["content"]
227
-
228
- if isinstance(content, list):
229
- content_str = ", ".join(
230
- # pylint: disable=line-too-long
231
- f"{item['type']}: {item['text'] if 'text' in item else item['image_url']}"
232
- if 'type' in item else f"text: {item['text']}"
233
- for item in content
234
- )
235
- formatted_messages.append(f"{role}: {content_str}")
307
+ chunk = self.__wrapped__.__next__()
308
+ end_time = time.time()
309
+ # Record the timestamp for the current chunk
310
+ self._timestamps.append(end_time)
311
+
312
+ if len(self._timestamps) == 1:
313
+ # Calculate time to first chunk
314
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
315
+
316
+ chunked = response_as_dict(chunk)
317
+
318
+ self._llmresponse += chunked.get('data').get('choices')[0].get('delta').get('content')
319
+ if chunked.get('data').get('usage') is not None:
320
+ self._response_id = chunked.get('data').get('id')
321
+ self._response_model = chunked.get('data').get('model')
322
+ self._input_tokens = chunked.get('data').get('usage').get('prompt_tokens')
323
+ self._output_tokens = chunked.get('data').get('usage').get('completion_tokens')
324
+ self._finish_reason = chunked.get('data').get('choices')[0].get('finish_reason')
325
+
326
+ return chunk
327
+ except StopIteration:
328
+ # Handling exception ensure observability without disrupting operation
329
+ try:
330
+ self._end_time = time.time()
331
+ if len(self._timestamps) > 1:
332
+ self._tbt = calculate_tbt(self._timestamps)
333
+
334
+ # Format 'messages' into a single string
335
+ message_prompt = self._kwargs.get('messages', '')
336
+ formatted_messages = []
337
+ for message in message_prompt:
338
+ role = message['role']
339
+ content = message['content']
340
+
341
+ if isinstance(content, list):
342
+ content_str_list = []
343
+ for item in content:
344
+ if item['type'] == 'text':
345
+ content_str_list.append(f'text: {item["text"]}')
346
+ elif (item['type'] == 'image_url' and
347
+ not item['image_url']['url'].startswith('data:')):
348
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
349
+ content_str = ", ".join(content_str_list)
350
+ formatted_messages.append(f'{role}: {content_str}')
351
+ else:
352
+ formatted_messages.append(f'{role}: {content}')
353
+ prompt = '\n'.join(formatted_messages)
354
+
355
+ request_model = self._kwargs.get('model', 'mistral-small-latest')
356
+
357
+ # Calculate cost of the operation
358
+ cost = get_chat_model_cost(request_model,
359
+ pricing_info, self._input_tokens,
360
+ self._output_tokens)
361
+
362
+ # Set Span attributes (OTel Semconv)
363
+ self._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
364
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
365
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
366
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
367
+ SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
368
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
369
+ request_model)
370
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
371
+ self._kwargs.get('seed', ''))
372
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
373
+ self._server_port)
374
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
375
+ self._kwargs.get('frequency_penalty', 0.0))
376
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
377
+ self._kwargs.get('max_tokens', -1))
378
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
379
+ self._kwargs.get('presence_penalty', 0.0))
380
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
381
+ self._kwargs.get('stop_sequences', []))
382
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
383
+ self._kwargs.get('temperature', 0.3))
384
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
385
+ self._kwargs.get('k', 1.0))
386
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
387
+ self._kwargs.get('p', 1.0))
388
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
389
+ [self._finish_reason])
390
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
391
+ self._response_id)
392
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
393
+ self._response_model)
394
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
395
+ self._input_tokens)
396
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
397
+ self._output_tokens)
398
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
399
+ self._server_address)
400
+
401
+ if isinstance(self._llmresponse, str):
402
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
403
+ 'text')
236
404
  else:
237
- formatted_messages.append(f"{role}: {content}")
238
- prompt = " ".join(formatted_messages)
239
-
240
- # Calculate cost of the operation
241
- cost = get_chat_model_cost(kwargs.get("model", "mistral-small-latest"),
242
- pricing_info, prompt_tokens, completion_tokens)
243
-
244
- # Set Span attributes
245
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
246
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
247
- SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
248
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
249
- SemanticConvetion.GEN_AI_TYPE_CHAT)
250
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
251
- gen_ai_endpoint)
252
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
253
- response_id)
254
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
255
- environment)
256
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
257
- application_name)
258
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
259
- kwargs.get("model", "mistral-small-latest"))
260
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
261
- kwargs.get("temperature", 0.7))
262
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
263
- kwargs.get("top_p", 1.0))
264
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
265
- kwargs.get("max_tokens", -1))
266
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
267
- kwargs.get("random_seed", ""))
268
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
269
- True)
270
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
271
- [finish_reason])
272
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
273
- prompt_tokens)
274
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
275
- completion_tokens)
276
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
277
- total_tokens)
278
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
279
- cost)
280
- if trace_content:
281
- span.add_event(
282
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
283
- attributes={
284
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
285
- },
286
- )
287
- span.add_event(
288
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
289
- attributes={
290
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
291
- },
292
- )
405
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
406
+ 'json')
407
+
408
+ # Set Span attributes (Extra)
409
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
410
+ environment)
411
+ self._span.set_attribute(SERVICE_NAME,
412
+ application_name)
413
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
414
+ True)
415
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
416
+ self._input_tokens + self._output_tokens)
417
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
418
+ cost)
419
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
420
+ self._tbt)
421
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
422
+ self._ttft)
423
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
424
+ version)
425
+ if trace_content:
426
+ self._span.add_event(
427
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
428
+ attributes={
429
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
430
+ },
431
+ )
432
+ self._span.add_event(
433
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
434
+ attributes={
435
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
436
+ },
437
+ )
438
+ self._span.set_status(Status(StatusCode.OK))
439
+
440
+ if disable_metrics is False:
441
+ attributes = create_metrics_attributes(
442
+ service_name=application_name,
443
+ deployment_environment=environment,
444
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
445
+ system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
446
+ request_model=request_model,
447
+ server_address=self._server_address,
448
+ server_port=self._server_port,
449
+ response_model=self._response_model,
450
+ )
293
451
 
294
- span.set_status(Status(StatusCode.OK))
295
-
296
- if disable_metrics is False:
297
- attributes = {
298
- TELEMETRY_SDK_NAME:
299
- "openlit",
300
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
301
- application_name,
302
- SemanticConvetion.GEN_AI_SYSTEM:
303
- SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
304
- SemanticConvetion.GEN_AI_ENVIRONMENT:
305
- environment,
306
- SemanticConvetion.GEN_AI_TYPE:
307
- SemanticConvetion.GEN_AI_TYPE_CHAT,
308
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
309
- kwargs.get("model", "mistral-small-latest")
310
- }
311
-
312
- metrics["genai_requests"].add(1, attributes)
313
- metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
314
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
315
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
316
- metrics["genai_cost"].record(cost)
317
-
318
- except Exception as e:
319
- handle_exception(span, e)
320
- logger.error("Error in trace creation: %s", e)
321
-
322
- return stream_generator()
452
+ metrics['genai_client_usage_tokens'].record(
453
+ self._input_tokens + self._output_tokens, attributes
454
+ )
455
+ metrics['genai_client_operation_duration'].record(
456
+ self._end_time - self._start_time, attributes
457
+ )
458
+ metrics['genai_server_tbt'].record(
459
+ self._tbt, attributes
460
+ )
461
+ metrics['genai_server_ttft'].record(
462
+ self._ttft, attributes
463
+ )
464
+ metrics['genai_requests'].add(1, attributes)
465
+ metrics['genai_completion_tokens'].add(self._output_tokens, attributes)
466
+ metrics['genai_prompt_tokens'].add(self._input_tokens, attributes)
467
+ metrics['genai_cost'].record(cost, attributes)
468
+
469
+ except Exception as e:
470
+ handle_exception(self._span, e)
471
+ logger.error('Error in trace creation: %s', e)
472
+ finally:
473
+ self._span.end()
474
+ raise
475
+
476
+ server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
477
+ request_model = kwargs.get('model', 'mistral-small-latest')
478
+
479
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
480
+
481
+ awaited_wrapped = wrapped(*args, **kwargs)
482
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
483
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
323
484
 
324
485
  return wrapper
325
486
 
326
- def embeddings(gen_ai_endpoint, version, environment, application_name,
327
- tracer, pricing_info, trace_content, metrics, disable_metrics):
487
+ def embeddings(version, environment, application_name,
488
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
328
489
  """
329
490
  Generates a telemetry wrapper for embeddings to collect metrics.
330
491
 
331
492
  Args:
332
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
333
493
  version: Version of the monitoring package.
334
494
  environment: Deployment environment (e.g., production, staging).
335
- application_name: Name of the application using the OpenAI API.
495
+ application_name: Name of the application using the Mistral API.
336
496
  tracer: OpenTelemetry tracer for creating spans.
337
- pricing_info: Information used for calculating the cost of OpenAI usage.
497
+ pricing_info: Information used for calculating the cost of Mistral usage.
338
498
  trace_content: Flag indicating whether to trace the actual content.
339
499
 
340
500
  Returns:
@@ -358,78 +518,92 @@ def embeddings(gen_ai_endpoint, version, environment, application_name,
358
518
  The response from the original 'embeddings' method.
359
519
  """
360
520
 
361
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
521
+ server_address, server_port = set_server_address_and_port(instance, 'api.mistral.ai', 443)
522
+ request_model = kwargs.get('model', 'mistral-embed')
523
+
524
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
525
+
526
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
527
+ start_time = time.time()
362
528
  response = wrapped(*args, **kwargs)
529
+ end_time = time.time()
363
530
 
531
+ response_dict = response_as_dict(response)
364
532
  try:
365
- # Get prompt from kwargs and store as a single string
366
- prompt = ', '.join(kwargs.get('inputs', []))
533
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
367
534
 
368
535
  # Calculate cost of the operation
369
- cost = get_embed_model_cost(kwargs.get('model', "mistral-embed"),
370
- pricing_info, response.usage.prompt_tokens)
536
+ cost = get_embed_model_cost(request_model,
537
+ pricing_info, input_tokens)
371
538
 
372
- # Set Span attributes
373
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
539
+ # Set Span attributes (OTel Semconv)
540
+ span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
541
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
542
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
374
543
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
375
544
  SemanticConvetion.GEN_AI_SYSTEM_MISTRAL)
376
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
377
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
378
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
379
- gen_ai_endpoint)
380
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
545
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
546
+ request_model)
547
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
548
+ [kwargs.get('encoding_format', 'float')])
549
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
550
+ response_dict.get('model'))
551
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
552
+ server_address)
553
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
554
+ server_port)
555
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
556
+ input_tokens)
557
+
558
+ # Set Span attributes (Extras)
559
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
381
560
  environment)
382
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
561
+ span.set_attribute(SERVICE_NAME,
383
562
  application_name)
384
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
385
- kwargs.get('model', "mistral-embed"))
386
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
387
- kwargs.get("encoding_format", "float"))
388
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
389
- response.id)
390
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
391
- response.usage.prompt_tokens)
392
563
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
393
- response.usage.total_tokens)
564
+ input_tokens)
394
565
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
395
566
  cost)
567
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
568
+ version)
569
+
396
570
  if trace_content:
397
571
  span.add_event(
398
572
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
399
573
  attributes={
400
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
574
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get('inputs', '')),
401
575
  },
402
576
  )
403
577
 
404
578
  span.set_status(Status(StatusCode.OK))
405
579
 
406
580
  if disable_metrics is False:
407
- attributes = {
408
- TELEMETRY_SDK_NAME:
409
- "openlit",
410
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
411
- application_name,
412
- SemanticConvetion.GEN_AI_SYSTEM:
413
- SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
414
- SemanticConvetion.GEN_AI_ENVIRONMENT:
415
- environment,
416
- SemanticConvetion.GEN_AI_TYPE:
417
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
418
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
419
- kwargs.get('model', "mistral-embed")
420
- }
421
-
422
- metrics["genai_requests"].add(1, attributes)
423
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
424
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
425
- metrics["genai_cost"].record(cost, attributes)
581
+ attributes = create_metrics_attributes(
582
+ service_name=application_name,
583
+ deployment_environment=environment,
584
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
585
+ system=SemanticConvetion.GEN_AI_SYSTEM_MISTRAL,
586
+ request_model=request_model,
587
+ server_address=server_address,
588
+ server_port=server_port,
589
+ response_model=response_dict.get('model'),
590
+ )
591
+ metrics['genai_client_usage_tokens'].record(
592
+ input_tokens, attributes
593
+ )
594
+ metrics['genai_client_operation_duration'].record(
595
+ end_time - start_time, attributes
596
+ )
597
+ metrics['genai_requests'].add(1, attributes)
598
+ metrics['genai_prompt_tokens'].add(input_tokens, attributes)
599
+ metrics['genai_cost'].record(cost, attributes)
426
600
 
427
601
  # Return original response
428
602
  return response
429
603
 
430
604
  except Exception as e:
431
605
  handle_exception(span, e)
432
- logger.error("Error in trace creation: %s", e)
606
+ logger.error('Error in trace creation: %s', e)
433
607
 
434
608
  # Return original response
435
609
  return response