openlit 1.34.14__py3-none-any.whl → 1.34.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,104 +3,185 @@ Azure AI Inference OpenTelemetry instrumentation utility functions
3
3
  """
4
4
  import time
5
5
 
6
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
6
  from opentelemetry.trace import Status, StatusCode
8
7
 
9
8
  from openlit.__helpers import (
10
9
  calculate_ttft,
11
10
  response_as_dict,
12
11
  calculate_tbt,
13
- extract_and_format_input,
14
12
  get_chat_model_cost,
15
- create_metrics_attributes,
16
- otel_event,
17
- concatenate_all_contents
13
+ get_embed_model_cost,
14
+ common_span_attributes,
15
+ record_completion_metrics,
16
+ record_embedding_metrics,
18
17
  )
19
18
  from openlit.semcov import SemanticConvention
20
19
 
21
- def process_chunk(self, chunk):
20
+ def format_content(messages):
21
+ """
22
+ Process a list of messages to extract content.
23
+ """
24
+
25
+ formatted_messages = []
26
+ for message in messages:
27
+ role = message.get("role", "user")
28
+ content = message.get("content", "")
29
+
30
+ if isinstance(content, list):
31
+ content_str = ", ".join(
32
+ f'{item["type"]}: {item["text"] if "text" in item else item.get("image_url", "")}'
33
+ if "type" in item else f'text: {item.get("text", "")}'
34
+ for item in content
35
+ )
36
+ formatted_messages.append(f'{role}: {content_str}')
37
+ else:
38
+ formatted_messages.append(f'{role}: {content}')
39
+
40
+ return '\n'.join(formatted_messages)
41
+
42
+ def process_chunk(scope, chunk):
22
43
  """
23
44
  Process a chunk of response data and update state.
24
45
  """
25
46
 
26
47
  end_time = time.time()
27
48
  # Record the timestamp for the current chunk
28
- self._timestamps.append(end_time)
49
+ scope._timestamps.append(end_time)
29
50
 
30
- if len(self._timestamps) == 1:
51
+ if len(scope._timestamps) == 1:
31
52
  # Calculate time to first chunk
32
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
53
+ scope._ttft = calculate_ttft(scope._timestamps, scope._start_time)
33
54
 
34
55
  chunked = response_as_dict(chunk)
35
56
 
36
57
  # Collect message IDs and aggregated response from events
37
- if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
38
- 'content' in chunked.get('choices')[0].get('delta'))):
58
+ choices = chunked.get("choices", [])
59
+ if choices and "delta" in choices[0]:
60
+ delta = choices[0]["delta"]
61
+
62
+ # Handle content
63
+ content = delta.get("content")
64
+ if content:
65
+ scope._llmresponse += content
66
+
67
+ # Handle reasoning content (if present)
68
+ reasoning_content = delta.get("reasoning_content")
69
+ if reasoning_content:
70
+ if not hasattr(scope, "_reasoning_content"):
71
+ scope._reasoning_content = ""
72
+ scope._reasoning_content += reasoning_content
39
73
 
40
- if content := chunked.get('choices')[0].get('delta').get('content'):
41
- self._llmresponse += content
74
+ # Handle finish_reason (appears in final chunk)
75
+ finish_reason = chunked.get("choices")[0].get("finish_reason")
76
+ if finish_reason:
77
+ scope._finish_reason = finish_reason
78
+ scope._end_time = time.time()
42
79
 
43
- if chunked.get('choices')[0].get('finish_reason') is not None:
44
- self._finish_reason = chunked.get('choices')[0].get('finish_reason')
80
+ # Handle tool calls in streaming - optimized
81
+ delta_tools = delta.get("tool_calls")
82
+ if delta_tools:
83
+ scope._tools = scope._tools or []
45
84
 
46
- if chunked.get('usage') is not None:
47
- self._input_tokens = chunked.get('usage').get('prompt_tokens')
48
- self._response_id = chunked.get('id')
49
- self._response_model = chunked.get('model')
50
- self._output_tokens = chunked.get('usage').get('completion_tokens')
85
+ for tool in delta_tools:
86
+ idx = tool.get("index", 0)
87
+
88
+ # Extend list if needed
89
+ scope._tools.extend([{}] * (idx + 1 - len(scope._tools)))
90
+
91
+ if tool.get("id"): # New tool (id exists)
92
+ func = tool.get("function", {})
93
+ scope._tools[idx] = {
94
+ "id": tool["id"],
95
+ "function": {"name": func.get("name", ""), "arguments": func.get("arguments", "")},
96
+ "type": tool.get("type", "function")
97
+ }
98
+ elif scope._tools[idx] and "function" in tool: # Append args (id is None)
99
+ scope._tools[idx]["function"]["arguments"] += tool["function"].get("arguments", "")
100
+
101
+ # Handle usage information (typically only in final chunk)
102
+ if chunked.get("usage"):
103
+ scope._input_tokens = chunked.get("usage").get("prompt_tokens", 0)
104
+ scope._output_tokens = chunked.get("usage").get("completion_tokens", 0)
105
+ # Handle reasoning tokens if present (optional) - check nested structure
106
+ completion_details = chunked.get("usage", {}).get("completion_tokens_details", {})
107
+ if "reasoning_tokens" in completion_details:
108
+ scope._reasoning_tokens = completion_details.get("reasoning_tokens", 0)
109
+ elif "reasoning_tokens" in chunked.get("usage", {}):
110
+ scope._reasoning_tokens = chunked.get("usage").get("reasoning_tokens", 0)
51
111
 
52
112
  def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
53
- event_provider, capture_message_content, disable_metrics, version, is_stream):
113
+ capture_message_content, disable_metrics, version, is_stream):
54
114
  """
55
115
  Process chat request and generate Telemetry
56
116
  """
57
117
 
58
- scope._end_time = time.time()
59
118
  if len(scope._timestamps) > 1:
60
119
  scope._tbt = calculate_tbt(scope._timestamps)
61
120
 
62
- formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
63
- request_model = scope._kwargs.get('model', 'claude-3-opus-20240229')
121
+ prompt = format_content(scope._kwargs.get("messages", []))
122
+ request_model = scope._kwargs.get("model", "gpt-4o")
64
123
 
65
124
  cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
66
125
 
67
- # Set Span attributes (OTel Semconv)
68
- scope._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
69
- scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
70
- scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
71
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
72
- scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
73
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get('max_tokens', -1))
74
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get('stop', []))
75
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get('temperature', 1.0))
76
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, scope._kwargs.get('top_k', 1.0))
77
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get('top_p', 1.0))
78
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
79
- scope._kwargs.get('frequency_penalty', 0.0))
80
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
81
- scope._kwargs.get('presence_penalty', 0.0))
82
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
126
+ # Common Span Attributes
127
+ common_span_attributes(scope,
128
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
129
+ scope._server_address, scope._server_port, request_model, scope._response_model,
130
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
131
+
132
+ # Span Attributes for Request parameters
133
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, scope._kwargs.get("frequency_penalty", 0.0))
134
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get("max_tokens", -1))
135
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, scope._kwargs.get("presence_penalty", 0.0))
136
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get("stop", []))
137
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get("temperature", 1.0))
138
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, scope._kwargs.get("top_p", 1.0))
139
+
140
+ # Span Attributes for Response parameters
83
141
  scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
84
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
142
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
143
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER, scope._response_service_tier)
144
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT, scope._response_service_tier)
145
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
146
+
147
+ # Span Attributes for Cost and Tokens
85
148
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
86
149
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
87
- scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
88
-
89
- scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
90
- 'text' if isinstance(scope._llmresponse, str) else 'json')
91
-
92
- scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
93
- scope._span.set_attribute(SERVICE_NAME, application_name)
94
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
95
150
  scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
96
151
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
97
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
98
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
99
- scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
100
152
 
101
- # To be removed one the change to log events (from span events) is complete
102
- prompt = concatenate_all_contents(formatted_messages)
153
+ # Span Attributes for Reasoning (if present)
154
+ if hasattr(scope, "_reasoning_tokens") and scope._reasoning_tokens > 0:
155
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_REASONING_TOKENS, scope._reasoning_tokens)
156
+ # Update total token usage to include reasoning tokens
157
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
158
+ scope._input_tokens + scope._output_tokens + scope._reasoning_tokens)
159
+
160
+ # Span Attributes for Tools - optimized
161
+ if scope._tools:
162
+ tools = scope._tools if isinstance(scope._tools, list) else [scope._tools]
163
+
164
+ names, ids, args = zip(*[
165
+ (t.get("function", {}).get("name", ""),
166
+ str(t.get("id", "")),
167
+ str(t.get("function", {}).get("arguments", "")))
168
+ for t in tools if isinstance(t, dict) and t
169
+ ]) if tools else ([], [], [])
170
+
171
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_NAME, ", ".join(filter(None, names)))
172
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALL_ID, ", ".join(filter(None, ids)))
173
+ scope._span.set_attribute(SemanticConvention.GEN_AI_TOOL_ARGS, ", ".join(filter(None, args)))
174
+
175
+ # Span Attributes for Content
103
176
  if capture_message_content:
177
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
178
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
179
+
180
+ # Add reasoning content if available
181
+ if hasattr(scope, "_reasoning_content") and scope._reasoning_content:
182
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_REASONING, scope._reasoning_content)
183
+
184
+ # To be removed once the change to span_attributes (from span events) is complete
104
185
  scope._span.add_event(
105
186
  name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
106
187
  attributes={
@@ -114,112 +195,143 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
114
195
  },
115
196
  )
116
197
 
117
- choice_event_body = {
118
- 'finish_reason': scope._finish_reason,
119
- 'index': 0,
120
- 'message': {
121
- **({'content': scope._llmresponse} if capture_message_content else {}),
122
- 'role': 'assistant'
123
- }
124
- }
125
-
126
- # Emit events
127
- for role in ['user', 'system', 'assistant', 'tool']:
128
- if formatted_messages.get(role, {}).get('content', ''):
129
- event = otel_event(
130
- name=getattr(SemanticConvention, f'GEN_AI_{role.upper()}_MESSAGE'),
131
- attributes={
132
- SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AZURE_AI_INFERENCE
133
- },
134
- body = {
135
- # pylint: disable=line-too-long
136
- **({'content': formatted_messages.get(role, {}).get('content', '')} if capture_message_content else {}),
137
- 'role': formatted_messages.get(role, {}).get('role', []),
138
- **({
139
- 'tool_calls': {
140
- 'function': {
141
- # pylint: disable=line-too-long
142
- 'name': (scope._tool_calls[0].get('function', {}).get('name', '') if scope._tool_calls else ''),
143
- 'arguments': (scope._tool_calls[0].get('function', {}).get('arguments', '') if scope._tool_calls else '')
144
- },
145
- 'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else ''),
146
- 'type': 'function'
147
- }
148
- } if role == 'assistant' else {}),
149
- **({
150
- 'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else '')
151
- } if role == 'tool' else {})
152
- }
153
- )
154
- event_provider.emit(event)
155
-
156
- choice_event = otel_event(
157
- name=SemanticConvention.GEN_AI_CHOICE,
158
- attributes={
159
- SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AZURE_AI_INFERENCE
160
- },
161
- body=choice_event_body
162
- )
163
- event_provider.emit(choice_event)
164
-
165
198
  scope._span.set_status(Status(StatusCode.OK))
166
199
 
200
+ # Metrics
167
201
  if not disable_metrics:
168
- metrics_attributes = create_metrics_attributes(
169
- service_name=application_name,
170
- deployment_environment=environment,
171
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
172
- system=SemanticConvention.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
173
- request_model=request_model,
174
- server_address=scope._server_address,
175
- server_port=scope._server_port,
176
- response_model=scope._response_model,
177
- )
202
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
203
+ SemanticConvention.GEN_AI_SYSTEM_AZURE_AI_INFERENCE, scope._server_address, scope._server_port,
204
+ request_model, scope._response_model, environment, application_name, scope._start_time, scope._end_time,
205
+ scope._input_tokens, scope._output_tokens, cost, scope._tbt, scope._ttft)
178
206
 
179
- metrics['genai_client_usage_tokens'].record(scope._input_tokens + scope._output_tokens, metrics_attributes)
180
- metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
181
- metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
182
- metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
183
- metrics['genai_requests'].add(1, metrics_attributes)
184
- metrics['genai_completion_tokens'].add(scope._output_tokens, metrics_attributes)
185
- metrics['genai_prompt_tokens'].add(scope._input_tokens, metrics_attributes)
186
- metrics['genai_cost'].record(cost, metrics_attributes)
187
-
188
- def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
189
- event_provider, capture_message_content=False, disable_metrics=False, version=''):
207
+ def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
208
+ capture_message_content=False, disable_metrics=False, version=""):
190
209
  """
191
- Process chat request and generate Telemetry
210
+ Process streaming chat request and generate Telemetry
192
211
  """
193
212
 
194
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
195
- event_provider, capture_message_content, disable_metrics, version, is_stream=True)
213
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
214
+ capture_message_content, disable_metrics, version, is_stream=True)
196
215
 
197
216
  def process_chat_response(response, request_model, pricing_info, server_port, server_address,
198
- environment, application_name, metrics, event_provider, start_time,
199
- span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
217
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
218
+ disable_metrics=False, version="1.0.0", **kwargs):
200
219
  """
201
220
  Process chat request and generate Telemetry
202
221
  """
203
222
 
204
- self = type('GenericScope', (), {})()
223
+ # Create scope object
224
+ scope = type("GenericScope", (), {})()
225
+ response_dict = response_as_dict(response)
226
+
227
+ scope._start_time = start_time
228
+ scope._end_time = time.time()
229
+ scope._span = span
230
+ scope._llmresponse = " ".join(
231
+ (choice.get("message", {}).get("content") or "")
232
+ for choice in response_dict.get("choices", [])
233
+ )
234
+ # Handle reasoning content from non-streaming response
235
+ reasoning_content = response_dict.get("choices", [{}])[0].get("message", {}).get("reasoning_content")
236
+ if reasoning_content:
237
+ scope._reasoning_content = reasoning_content
238
+
239
+ scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
240
+ scope._output_tokens = response_dict.get("usage", {}).get("completion_tokens", 0)
241
+ # Handle reasoning tokens if present (optional) - check nested structure
242
+ completion_details = response_dict.get("usage", {}).get("completion_tokens_details", {})
243
+ if "reasoning_tokens" in completion_details:
244
+ scope._reasoning_tokens = completion_details.get("reasoning_tokens", 0)
245
+ elif "reasoning_tokens" in response_dict.get("usage", {}):
246
+ scope._reasoning_tokens = response_dict.get("usage").get("reasoning_tokens", 0)
247
+ else:
248
+ scope._reasoning_tokens = 0
249
+ scope._response_id = response_dict.get("id")
250
+ scope._response_model = response_dict.get("model")
251
+ scope._finish_reason = str(response_dict.get("choices", [])[0].get("finish_reason", ""))
252
+ scope._response_service_tier = str(response_dict.get("system_fingerprint", ""))
253
+ scope._timestamps = []
254
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
255
+ scope._server_address, scope._server_port = server_address, server_port
256
+ scope._kwargs = kwargs
257
+
258
+ # Handle tool calls
259
+ if scope._kwargs.get("tools"):
260
+ scope._tools = response_dict.get("choices", [{}])[0].get("message", {}).get("tool_calls")
261
+ else:
262
+ scope._tools = None
263
+
264
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
265
+ capture_message_content, disable_metrics, version, is_stream=False)
266
+
267
+ return response
268
+
269
+ def common_embedding_logic(scope, pricing_info, environment, application_name, metrics,
270
+ capture_message_content, disable_metrics, version):
271
+ """
272
+ Process embedding request and generate Telemetry
273
+ """
274
+
275
+ request_model = scope._kwargs.get("model", "text-embedding-3-small")
276
+
277
+ cost = get_embed_model_cost(request_model, pricing_info, scope._input_tokens)
278
+
279
+ # Common Span Attributes
280
+ common_span_attributes(scope,
281
+ SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING, SemanticConvention.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
282
+ scope._server_address, scope._server_port, request_model, scope._response_model,
283
+ environment, application_name, False, 0, scope._end_time - scope._start_time, version)
284
+
285
+ # Span Attributes for Request parameters
286
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS, [scope._kwargs.get("encoding_format", "float")])
287
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, scope._kwargs.get("user", ""))
288
+
289
+ # Span Attributes for Cost and Tokens
290
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
291
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens)
292
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
293
+
294
+ # Span Attributes for Content
295
+ if capture_message_content:
296
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, str(scope._kwargs.get("input", "")))
297
+
298
+ # To be removed once the change to span_attributes (from span events) is complete
299
+ scope._span.add_event(
300
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
301
+ attributes={
302
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: str(scope._kwargs.get("input", "")),
303
+ },
304
+ )
305
+
306
+ scope._span.set_status(Status(StatusCode.OK))
307
+
308
+ # Metrics
309
+ if not disable_metrics:
310
+ record_embedding_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
311
+ SemanticConvention.GEN_AI_SYSTEM_AZURE_AI_INFERENCE, scope._server_address, scope._server_port,
312
+ request_model, scope._response_model, environment, application_name, scope._start_time, scope._end_time,
313
+ scope._input_tokens, cost)
314
+
315
+ def process_embedding_response(response, request_model, pricing_info, server_port, server_address,
316
+ environment, application_name, metrics, start_time, span, capture_message_content=False,
317
+ disable_metrics=False, version="1.0.0", **kwargs):
318
+ """
319
+ Process embedding request and generate Telemetry
320
+ """
321
+
322
+ # Create scope object
323
+ scope = type("GenericScope", (), {})()
205
324
  response_dict = response_as_dict(response)
206
325
 
207
- # pylint: disable = no-member
208
- self._start_time = start_time
209
- self._end_time = time.time()
210
- self._span = span
211
- self._llmresponse = response_dict.get('choices', {})[0].get('message', '').get('content', '')
212
- self._input_tokens = response_dict.get('usage').get('prompt_tokens')
213
- self._output_tokens = response_dict.get('usage').get('completion_tokens')
214
- self._response_model = response_dict.get('model', '')
215
- self._finish_reason = response_dict.get('choices', {})[0].get('finish_reason', '')
216
- self._response_id = response_dict.get('id', '')
217
- self._timestamps = []
218
- self._ttft, self._tbt = self._end_time - self._start_time, 0
219
- self._server_address, self._server_port = server_address, server_port
220
- self._kwargs = kwargs
221
-
222
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
223
- event_provider, capture_message_content, disable_metrics, version, is_stream=False)
326
+ scope._start_time = start_time
327
+ scope._end_time = time.time()
328
+ scope._span = span
329
+ scope._input_tokens = response_dict.get("usage", {}).get("prompt_tokens", 0)
330
+ scope._response_model = response_dict.get("model")
331
+ scope._server_address, scope._server_port = server_address, server_port
332
+ scope._kwargs = kwargs
333
+
334
+ common_embedding_logic(scope, pricing_info, environment, application_name, metrics,
335
+ capture_message_content, disable_metrics, version)
224
336
 
225
337
  return response
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of LiteLLM Functions"""
3
2
 
4
3
  from typing import Collection
@@ -17,15 +16,15 @@ _instruments = ("litellm >= 1.52.6",)
17
16
 
18
17
  class LiteLLMInstrumentor(BaseInstrumentor):
19
18
  """
20
- An instrumentor for LiteLLM's client library.
19
+ An instrumentor for LiteLLM client library.
21
20
  """
22
21
 
23
22
  def instrumentation_dependencies(self) -> Collection[str]:
24
23
  return _instruments
25
24
 
26
25
  def _instrument(self, **kwargs):
27
- application_name = kwargs.get("application_name", "default_application")
28
- environment = kwargs.get("environment", "default_environment")
26
+ application_name = kwargs.get("application_name", "default")
27
+ environment = kwargs.get("environment", "default")
29
28
  tracer = kwargs.get("tracer")
30
29
  metrics = kwargs.get("metrics_dict")
31
30
  pricing_info = kwargs.get("pricing_info", {})
@@ -33,7 +32,7 @@ class LiteLLMInstrumentor(BaseInstrumentor):
33
32
  disable_metrics = kwargs.get("disable_metrics")
34
33
  version = importlib.metadata.version("litellm")
35
34
 
36
- # completion
35
+ # Chat completions
37
36
  wrap_function_wrapper(
38
37
  "litellm",
39
38
  "completion",
@@ -41,6 +40,7 @@ class LiteLLMInstrumentor(BaseInstrumentor):
41
40
  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
42
41
  )
43
42
 
43
+ # Async chat completions
44
44
  wrap_function_wrapper(
45
45
  "litellm",
46
46
  "acompletion",
@@ -48,6 +48,7 @@ class LiteLLMInstrumentor(BaseInstrumentor):
48
48
  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
49
49
  )
50
50
 
51
+ # Embeddings
51
52
  wrap_function_wrapper(
52
53
  "litellm",
53
54
  "embedding",
@@ -55,6 +56,7 @@ class LiteLLMInstrumentor(BaseInstrumentor):
55
56
  tracer, pricing_info, capture_message_content, metrics, disable_metrics),
56
57
  )
57
58
 
59
+ # Async embeddings
58
60
  wrap_function_wrapper(
59
61
  "litellm",
60
62
  "aembedding",
@@ -63,5 +65,4 @@ class LiteLLMInstrumentor(BaseInstrumentor):
63
65
  )
64
66
 
65
67
  def _uninstrument(self, **kwargs):
66
- # Proper uninstrumentation logic to revert patched methods
67
68
  pass