openlit 1.34.16__py3-none-any.whl → 1.34.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,330 @@
1
+ """
2
+ Cohere OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.trace import Status, StatusCode
7
+
8
+ from openlit.__helpers import (
9
+ calculate_ttft,
10
+ response_as_dict,
11
+ calculate_tbt,
12
+ get_chat_model_cost,
13
+ get_embed_model_cost,
14
+ common_span_attributes,
15
+ record_completion_metrics,
16
+ record_embedding_metrics,
17
+ )
18
+ from openlit.semcov import SemanticConvention
19
+
20
+ def format_content(messages):
21
+ """
22
+ Process a list of messages to extract content.
23
+ """
24
+
25
+ formatted_messages = []
26
+ for message in messages:
27
+ # Handle both dictionary and object formats
28
+ if isinstance(message, dict):
29
+ role = message.get("role", "user")
30
+ content = message.get("content", "")
31
+ else:
32
+ # Handle Cohere object format (e.g., cohere.UserChatMessageV2)
33
+ role = getattr(message, "role", "user")
34
+ content = getattr(message, "content", "")
35
+
36
+ if isinstance(content, list):
37
+ content_str = ", ".join(
38
+ f'{item["type"]}: {item["text"] if "text" in item else item.get("image_url", "")}'
39
+ if "type" in item else f'text: {item.get("text", "")}'
40
+ for item in content
41
+ )
42
+ formatted_messages.append(f"{role}: {content_str}")
43
+ else:
44
+ formatted_messages.append(f"{role}: {content}")
45
+
46
+ return "\n".join(formatted_messages)
47
+
48
+ def process_chunk(scope, chunk):
49
+ """
50
+ Process a chunk of response data and update state.
51
+ """
52
+
53
+ end_time = time.time()
54
+ # Record the timestamp for the current chunk
55
+ scope._timestamps.append(end_time)
56
+
57
+ if len(scope._timestamps) == 1:
58
+ # Calculate time to first chunk
59
+ scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
60
+
61
+ chunked = response_as_dict(chunk)
62
+
63
+ # Handle different chunk types for Cohere streaming
64
+ if chunked.get("type") == "message-start":
65
+ scope._response_id = chunked.get("id")
66
+
67
+ if chunked.get("type") == "content-delta":
68
+ content = chunked.get("delta", {}).get("message", {}).get("content", {}).get("text")
69
+ if content:
70
+ scope._llmresponse += content
71
+
72
+ # Handle tool plan deltas
73
+ if chunked.get("type") == "tool-plan-delta":
74
+ tool_plan_text = chunked.get("delta", {}).get("message", {}).get("tool_plan", "")
75
+ if tool_plan_text:
76
+ if not hasattr(scope, "_tool_plan"):
77
+ scope._tool_plan = ""
78
+ scope._tool_plan += tool_plan_text
79
+
80
+ # Handle tool call start
81
+ if chunked.get("type") == "tool-call-start":
82
+ if not hasattr(scope, "_tools") or scope._tools is None:
83
+ scope._tools = []
84
+
85
+ index = chunked.get("index", 0)
86
+ tool_call = chunked.get("delta", {}).get("message", {}).get("tool_calls", {})
87
+
88
+ # Extend list if needed
89
+ scope._tools.extend([{}] * (index + 1 - len(scope._tools)))
90
+
91
+ # Initialize tool call
92
+ scope._tools[index] = {
93
+ "id": tool_call.get("id", ""),
94
+ "type": tool_call.get("type", "function"),
95
+ "function": {
96
+ "name": tool_call.get("function", {}).get("name", ""),
97
+ "arguments": ""
98
+ }
99
+ }
100
+
101
+ # Handle tool call deltas (arguments)
102
+ if chunked.get("type") == "tool-call-delta":
103
+ if hasattr(scope, "_tools") and scope._tools:
104
+ index = chunked.get("index", 0)
105
+ if index < len(scope._tools):
106
+ arguments = chunked.get("delta", {}).get("message", {}).get("tool_calls", {}).get("function", {}).get("arguments", "")
107
+ if arguments:
108
+ scope._tools[index]["function"]["arguments"] += arguments
109
+
110
+ if chunked.get("type") == "message-end":
111
+ delta = chunked.get("delta", {})
112
+ scope._finish_reason = delta.get("finish_reason", "")
113
+ usage = delta.get("usage", {}).get("billed_units", {})
114
+ scope._input_tokens = usage.get("input_tokens", 0)
115
+ scope._output_tokens = usage.get("output_tokens", 0)
116
+ scope._end_time = time.time()
117
+
118
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
119
+ capture_message_content, disable_metrics, version, is_stream):
120
+ """
121
+ Process chat request and generate Telemetry
122
+ """
123
+
124
+ if len(scope._timestamps) > 1:
125
+ scope._tbt = calculate_tbt(scope._timestamps)
126
+
127
+ prompt = format_content(scope._kwargs.get("messages", []))
128
+ request_model = scope._kwargs.get("model", "command-r-plus-08-2024")
129
+
130
+ cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
131
+
132
+ # Common Span Attributes
133
+ common_span_attributes(scope,
134
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_COHERE,
135
+ scope._server_address, scope._server_port, request_model, scope._response_model,
136
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
137
+
138
+ # Span Attributes for Request parameters
139
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, scope._kwargs.get("seed", ""))
140
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, scope._kwargs.get("frequency_penalty", 0.0))
141
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_tokens", -1))
142
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get("presence_penalty", 0.0))
143
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop_sequences", []))
144
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 0.3))
145
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, scope._kwargs.get("k", 1.0))
146
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("p", 1.0))
147
+
148
+ # Span Attributes for Response parameters
149
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
150
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
151
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
152
+
153
+ # Span Attributes for Cost and Tokens
154
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
155
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
156
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
157
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
158
+
159
+ # Span Attributes for Tools - optimized
160
+ if scope._tools:
161
+ tools = scope._tools if isinstance(scope._tools, list) else [scope._tools]
162
+
163
+ names, ids, args = zip(*[
164
+ (t.get("function", {}).get("name", ""),
165
+ str(t.get("id", "")),
166
+ str(t.get("function", {}).get("arguments", "")))
167
+ for t in tools if isinstance(t, dict) and t
168
+ ]) if tools else ([], [], [])
169
+
170
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, ", ".join(filter(None, names)))
171
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, ", ".join(filter(None, ids)))
172
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, ", ".join(filter(None, args)))
173
+
174
+ # Span Attributes for Tool Plan (Cohere specific)
175
+ if hasattr(scope, "_tool_plan") and scope._tool_plan:
176
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_REASONING, scope._tool_plan)
177
+
178
+ # Span Attributes for Content
179
+ if capture_message_content:
180
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
181
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
182
+
183
+ # To be removed once the change to span_attributes (from span events) is complete
184
+ scope._span.add_event(
185
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
186
+ attributes={
187
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
188
+ },
189
+ )
190
+ scope._span.add_event(
191
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
192
+ attributes={
193
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
194
+ },
195
+ )
196
+
197
+ scope._span.set_status(Status(StatusCode.OK))
198
+
199
+ # Metrics
200
+ if not disable_metrics:
201
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_COHERE,
202
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
203
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
204
+ cost, scope._tbt, scope._ttft)
205
+
206
+ def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
207
+ capture_message_content=False, disable_metrics=False, version=""):
208
+ """
209
+ Process streaming chat request and generate Telemetry
210
+ """
211
+
212
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
213
+ capture_message_content, disable_metrics, version, is_stream=True)
214
+
215
+ def process_chat_response(response, request_model, pricing_info, server_port, server_address,
216
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
217
+ disable_metrics=False, version="1.0.0", **kwargs):
218
+ """
219
+ Process chat request and generate Telemetry
220
+ """
221
+
222
+ # Create scope object
223
+ scope = type("GenericScope", (), {})()
224
+ response_dict = response_as_dict(response)
225
+
226
+ scope._start_time = start_time
227
+ scope._end_time = time.time()
228
+ scope._span = span
229
+ # Extract response content - handle both text and tool responses
230
+ message = response_dict.get("message", {})
231
+ content_list = message.get("content", [])
232
+ if content_list and isinstance(content_list, list) and len(content_list) > 0:
233
+ scope._llmresponse = content_list[0].get("text", "")
234
+ else:
235
+ scope._llmresponse = ""
236
+ scope._response_id = response_dict.get("id")
237
+ scope._response_model = request_model
238
+ scope._input_tokens = response_dict.get("usage", {}).get("billed_units", {}).get("input_tokens", 0)
239
+ scope._output_tokens = response_dict.get("usage", {}).get("billed_units", {}).get("output_tokens", 0)
240
+ scope._timestamps = []
241
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
242
+ scope._server_address, scope._server_port = server_address, server_port
243
+ scope._kwargs = kwargs
244
+ scope._finish_reason = response_dict.get("finish_reason", "")
245
+
246
+ # Handle tool calls
247
+ if scope._kwargs.get("tools"):
248
+ scope._tools = response_dict.get("message", {}).get("tool_calls")
249
+ # Handle tool plan if present
250
+ scope._tool_plan = response_dict.get("message", {}).get("tool_plan", "")
251
+ else:
252
+ scope._tools = None
253
+ scope._tool_plan = ""
254
+
255
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
256
+ capture_message_content, disable_metrics, version, is_stream=False)
257
+
258
+ return response
259
+
260
+ def common_embedding_logic(scope, pricing_info, environment, application_name, metrics,
261
+ capture_message_content, disable_metrics, version):
262
+ """
263
+ Process embedding request and generate Telemetry
264
+ """
265
+
266
+ request_model = scope._kwargs.get("model", "embed-english-v3.0")
267
+ inputs = scope._kwargs.get("texts", [])
268
+
269
+ cost = get_embed_model_cost(request_model, pricing_info, scope._input_tokens)
270
+
271
+ # Common Span Attributes
272
+ common_span_attributes(scope,
273
+ SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_COHERE,
274
+ scope._server_address, scope._server_port, request_model, scope._response_model,
275
+ environment, application_name, False, 0, scope._ttft, version)
276
+
277
+ # Span Attributes for Request parameters
278
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS, scope._kwargs.get("embedding_types", ["float"]))
279
+
280
+ # Span Attributes for Cost and Tokens
281
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
282
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens)
283
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
284
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, scope._response_type)
285
+
286
+ # Span Attributes for Content
287
+ if capture_message_content:
288
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, str(inputs))
289
+
290
+ # To be removed once the change to span_attributes (from span events) is complete
291
+ scope._span.add_event(
292
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
293
+ attributes={
294
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: str(inputs),
295
+ },
296
+ )
297
+
298
+ scope._span.set_status(Status(StatusCode.OK))
299
+
300
+ # Metrics
301
+ if not disable_metrics:
302
+ record_embedding_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_COHERE,
303
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
304
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, cost)
305
+
306
+ def process_embedding_response(response, request_model, pricing_info, server_port, server_address,
307
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
308
+ disable_metrics=False, version="1.0.0", **kwargs):
309
+ """
310
+ Process embedding request and generate Telemetry
311
+ """
312
+
313
+ # Create scope object
314
+ scope = type("GenericScope", (), {})()
315
+ response_dict = response_as_dict(response)
316
+
317
+ scope._start_time = start_time
318
+ scope._end_time = time.time()
319
+ scope._span = span
320
+ scope._input_tokens = response_dict.get("meta", {}).get("billed_units", {}).get("input_tokens", 0)
321
+ scope._response_model = request_model
322
+ scope._response_type = response_dict.get("response_type", "")
323
+ scope._ttft = scope._end_time - scope._start_time
324
+ scope._server_address, scope._server_port = server_address, server_port
325
+ scope._kwargs = kwargs
326
+
327
+ common_embedding_logic(scope, pricing_info, environment, application_name, metrics,
328
+ capture_message_content, disable_metrics, version)
329
+
330
+ return response
@@ -1,80 +1,88 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of Mistral Functions"""
2
+
3
3
  from typing import Collection
4
4
  import importlib.metadata
5
5
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
6
6
  from wrapt import wrap_function_wrapper
7
7
 
8
- from openlit.instrumentation.mistral.mistral import chat, chat_stream, embeddings
9
- from openlit.instrumentation.mistral.async_mistral import async_chat, async_chat_stream
10
- from openlit.instrumentation.mistral.async_mistral import async_embeddings
8
+ from openlit.instrumentation.mistral.mistral import (
9
+ complete,
10
+ stream,
11
+ embed
12
+ )
13
+ from openlit.instrumentation.mistral.async_mistral import (
14
+ async_complete,
15
+ async_stream,
16
+ async_embed
17
+ )
11
18
 
12
19
  _instruments = ("mistralai >= 1.0.0",)
13
20
 
14
21
  class MistralInstrumentor(BaseInstrumentor):
15
- """An instrumentor for Mistral's client library."""
22
+ """
23
+ An instrumentor for Mistral client library.
24
+ """
16
25
 
17
26
  def instrumentation_dependencies(self) -> Collection[str]:
18
27
  return _instruments
19
28
 
20
29
  def _instrument(self, **kwargs):
21
- application_name = kwargs.get("application_name")
22
- environment = kwargs.get("environment")
30
+ application_name = kwargs.get("application_name", "default")
31
+ environment = kwargs.get("environment", "default")
23
32
  tracer = kwargs.get("tracer")
24
33
  metrics = kwargs.get("metrics_dict")
25
- pricing_info = kwargs.get("pricing_info")
26
- capture_message_content = kwargs.get("capture_message_content")
34
+ pricing_info = kwargs.get("pricing_info", {})
35
+ capture_message_content = kwargs.get("capture_message_content", False)
27
36
  disable_metrics = kwargs.get("disable_metrics")
28
37
  version = importlib.metadata.version("mistralai")
29
38
 
30
- # sync
39
+ # sync chat completions
31
40
  wrap_function_wrapper(
32
- "mistralai.chat",
33
- "Chat.complete",
34
- chat(version, environment, application_name,
35
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
41
+ "mistralai.chat",
42
+ "Chat.complete",
43
+ complete(version, environment, application_name,
44
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
36
45
  )
37
46
 
38
- # sync
47
+ # sync chat streaming
39
48
  wrap_function_wrapper(
40
- "mistralai.chat",
41
- "Chat.stream",
42
- chat_stream(version, environment, application_name,
43
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
49
+ "mistralai.chat",
50
+ "Chat.stream",
51
+ stream(version, environment, application_name,
52
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
44
53
  )
45
54
 
46
- # sync
55
+ # sync embeddings
47
56
  wrap_function_wrapper(
48
- "mistralai.embeddings",
49
- "Embeddings.create",
50
- embeddings(version, environment, application_name,
51
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
57
+ "mistralai.embeddings",
58
+ "Embeddings.create",
59
+ embed(version, environment, application_name,
60
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
52
61
  )
53
62
 
54
- # Async
63
+ # async chat completions
55
64
  wrap_function_wrapper(
56
- "mistralai.chat",
57
- "Chat.complete_async",
58
- async_chat(version, environment, application_name,
59
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
65
+ "mistralai.chat",
66
+ "Chat.complete_async",
67
+ async_complete(version, environment, application_name,
68
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
60
69
  )
61
70
 
62
- # Async
71
+ # async chat streaming
63
72
  wrap_function_wrapper(
64
- "mistralai.chat",
65
- "Chat.stream_async",
66
- async_chat_stream(version, environment, application_name,
67
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
73
+ "mistralai.chat",
74
+ "Chat.stream_async",
75
+ async_stream(version, environment, application_name,
76
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
68
77
  )
69
78
 
70
- #sync
79
+ # async embeddings
71
80
  wrap_function_wrapper(
72
- "mistralai.embeddings",
73
- "Embeddings.create_async",
74
- async_embeddings(version, environment, application_name,
75
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
81
+ "mistralai.embeddings",
82
+ "Embeddings.create_async",
83
+ async_embed(version, environment, application_name,
84
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
76
85
  )
77
86
 
78
- @staticmethod
79
87
  def _uninstrument(self, **kwargs):
80
88
  pass