openlit 1.34.13__py3-none-any.whl → 1.34.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,57 +1,37 @@
1
1
  """
2
- Module for monitoring LiteLLM calls.
2
+ Module for monitoring LiteLLM API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
6
+ from opentelemetry.trace import SpanKind
9
7
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
- get_embed_model_cost,
12
- general_tokens,
13
8
  handle_exception,
14
- response_as_dict,
15
- calculate_ttft,
16
- calculate_tbt,
17
- create_metrics_attributes,
9
+ set_server_address_and_port
10
+ )
11
+ from openlit.instrumentation.litellm.utils import (
12
+ process_chunk,
13
+ process_streaming_chat_response,
14
+ process_chat_response,
15
+ process_embedding_response
18
16
  )
19
17
  from openlit.semcov import SemanticConvention
20
18
 
21
- # Initialize logger for logging potential issues and operations
22
- logger = logging.getLogger(__name__)
23
-
24
- def completion(version, environment, application_name,
25
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
19
+ def completion(version, environment, application_name, tracer, pricing_info,
20
+ capture_message_content, metrics, disable_metrics):
26
21
  """
27
- Generates a telemetry wrapper for chat completions to collect metrics.
28
-
29
- Args:
30
- version: Version of the monitoring package.
31
- environment: Deployment environment (e.g., production, staging).
32
- application_name: Name of the application using the LiteLLM SDK.
33
- tracer: OpenTelemetry tracer for creating spans.
34
- pricing_info: Information used for calculating the cost of LiteLLM usage.
35
- capture_message_content: Flag indicating whether to trace the actual content.
36
-
37
- Returns:
38
- A function that wraps the chat completions method to add telemetry.
22
+ Generates a telemetry wrapper for GenAI function call
39
23
  """
40
24
 
41
25
  class TracedSyncStream:
42
26
  """
43
- Wrapper for streaming responses to collect metrics and trace data.
44
-
45
- This class implements the '__aiter__' and '__anext__' methods that
46
- handle asynchronous streaming responses.
47
-
48
- This class also implements '__aenter__' and '__aexit__' methods that
49
- handle asynchronous context management protocol.
27
+ Wrapper for streaming responses to collect telemetry.
50
28
  """
29
+
51
30
  def __init__(
52
31
  self,
53
32
  wrapped,
54
33
  span,
34
+ span_name,
55
35
  kwargs,
56
36
  server_address,
57
37
  server_port,
@@ -59,12 +39,15 @@ def completion(version, environment, application_name,
59
39
  ):
60
40
  self.__wrapped__ = wrapped
61
41
  self._span = span
62
- self._llmresponse = ''
63
- self._response_id = ''
64
- self._response_model = ''
65
- self._finish_reason = ''
66
- self._response_service_tier = ''
67
-
42
+ self._span_name = span_name
43
+ self._llmresponse = ""
44
+ self._response_id = ""
45
+ self._response_model = ""
46
+ self._finish_reason = ""
47
+ self._response_service_tier = ""
48
+ self._tools = None
49
+ self._input_tokens = 0
50
+ self._output_tokens = 0
68
51
  self._args = args
69
52
  self._kwargs = kwargs
70
53
  self._start_time = time.time()
@@ -92,501 +75,114 @@ def completion(version, environment, application_name,
92
75
  def __next__(self):
93
76
  try:
94
77
  chunk = self.__wrapped__.__next__()
95
- end_time = time.time()
96
- # Record the timestamp for the current chunk
97
- self._timestamps.append(end_time)
98
-
99
- if len(self._timestamps) == 1:
100
- # Calculate time to first chunk
101
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
102
-
103
- chunked = response_as_dict(chunk)
104
- # Collect message IDs and aggregated response from events
105
- if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
106
- 'content' in chunked.get('choices')[0].get('delta'))):
107
-
108
- content = chunked.get('choices')[0].get('delta').get('content')
109
- if content:
110
- self._llmresponse += content
111
- self._response_id = chunked.get('id')
112
- self._response_model = chunked.get('model')
113
- self._finish_reason = chunked.get('choices')[0].get('finish_reason')
114
- self._response_service_tier = str(chunked.get('system_fingerprint'))
78
+ process_chunk(self, chunk)
115
79
  return chunk
116
80
  except StopIteration:
117
- # Handling exception ensure observability without disrupting operation
118
81
  try:
119
- self._end_time = time.time()
120
- if len(self._timestamps) > 1:
121
- self._tbt = calculate_tbt(self._timestamps)
122
-
123
- # Format 'messages' into a single string
124
- message_prompt = self._kwargs.get('messages', '')
125
- formatted_messages = []
126
- for message in message_prompt:
127
- role = message['role']
128
- content = message['content']
129
-
130
- if isinstance(content, list):
131
- content_str = ", ".join(
132
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
133
- if "type" in item else f'text: {item["text"]}'
134
- for item in content
135
- )
136
- formatted_messages.append(f'{role}: {content_str}')
137
- else:
138
- formatted_messages.append(f'{role}: {content}')
139
- prompt = '\n'.join(formatted_messages)
140
-
141
- request_model = self._kwargs.get('model', 'openai/gpt-4o')
142
-
143
- # Calculate tokens using input prompt and aggregated response
144
- input_tokens = general_tokens(prompt)
145
- output_tokens = general_tokens(self._llmresponse)
146
-
147
- # Calculate cost of the operation
148
- cost = get_chat_model_cost(request_model,
149
- pricing_info, input_tokens,
150
- output_tokens)
151
-
152
- # Set Span attributes (OTel Semconv)
153
- self._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
154
- self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
155
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
156
- self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
157
- SemanticConvention.GEN_AI_SYSTEM_LITELLM)
158
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
159
- request_model)
160
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
161
- self._kwargs.get('seed', ''))
162
- self._span.set_attribute(SemanticConvention.SERVER_PORT,
163
- self._server_port)
164
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
165
- self._kwargs.get('frequency_penalty', 0.0))
166
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
167
- self._kwargs.get('max_tokens', -1))
168
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
169
- self._kwargs.get('presence_penalty', 0.0))
170
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
171
- self._kwargs.get('stop', []))
172
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
173
- self._kwargs.get('temperature', 1.0))
174
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
175
- self._kwargs.get('top_p', 1.0))
176
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
177
- [self._finish_reason])
178
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
179
- self._response_id)
180
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
181
- self._response_model)
182
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
183
- input_tokens)
184
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
185
- output_tokens)
186
- self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
187
- self._server_address)
188
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
189
- self._kwargs.get('service_tier', 'auto'))
190
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER,
191
- self._response_service_tier)
192
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
193
- self._response_service_tier)
194
- if isinstance(self._llmresponse, str):
195
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
196
- 'text')
197
- else:
198
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
199
- 'json')
200
-
201
- # Set Span attributes (Extra)
202
- self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
203
- environment)
204
- self._span.set_attribute(SERVICE_NAME,
205
- application_name)
206
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
207
- self._kwargs.get('user', ''))
208
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
209
- True)
210
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
211
- input_tokens + output_tokens)
212
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
213
- cost)
214
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
215
- self._tbt)
216
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
217
- self._ttft)
218
- self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
219
- version)
220
- if capture_message_content:
221
- self._span.add_event(
222
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
223
- attributes={
224
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
225
- },
226
- )
227
- self._span.add_event(
228
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
229
- attributes={
230
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
231
- },
82
+ with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
83
+ process_streaming_chat_response(
84
+ self,
85
+ pricing_info=pricing_info,
86
+ environment=environment,
87
+ application_name=application_name,
88
+ metrics=metrics,
89
+ capture_message_content=capture_message_content,
90
+ disable_metrics=disable_metrics,
91
+ version=version
232
92
  )
233
- self._span.set_status(Status(StatusCode.OK))
234
-
235
- if disable_metrics is False:
236
- attributes = create_metrics_attributes(
237
- service_name=application_name,
238
- deployment_environment=environment,
239
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
240
- system=SemanticConvention.GEN_AI_SYSTEM_LITELLM,
241
- request_model=request_model,
242
- server_address=self._server_address,
243
- server_port=self._server_port,
244
- response_model=self._response_model,
245
- )
246
-
247
- metrics['genai_client_usage_tokens'].record(
248
- input_tokens + output_tokens, attributes
249
- )
250
- metrics['genai_client_operation_duration'].record(
251
- self._end_time - self._start_time, attributes
252
- )
253
- metrics['genai_server_tbt'].record(
254
- self._tbt, attributes
255
- )
256
- metrics['genai_server_ttft'].record(
257
- self._ttft, attributes
258
- )
259
- metrics['genai_requests'].add(1, attributes)
260
- metrics['genai_completion_tokens'].add(output_tokens, attributes)
261
- metrics['genai_prompt_tokens'].add(input_tokens, attributes)
262
- metrics['genai_cost'].record(cost, attributes)
263
93
 
264
94
  except Exception as e:
265
95
  handle_exception(self._span, e)
266
- logger.error('Error in trace creation: %s', e)
267
- finally:
268
- self._span.end()
96
+
269
97
  raise
270
98
 
271
99
  def wrapper(wrapped, instance, args, kwargs):
272
100
  """
273
- Wraps the 'chat.completions' API call to add telemetry.
274
-
275
- This collects metrics such as execution time, cost, and token usage, and handles errors
276
- gracefully, adding details to the trace for observability.
277
-
278
- Args:
279
- wrapped: The original 'chat.completions' method to be wrapped.
280
- instance: The instance of the class where the original method is defined.
281
- args: Positional arguments for the 'chat.completions' method.
282
- kwargs: Keyword arguments for the 'chat.completions' method.
283
-
284
- Returns:
285
- The response from the original 'chat.completions' method.
101
+ Wraps the GenAI function call.
286
102
  """
287
-
288
103
  # Check if streaming is enabled for the API call
289
- streaming = kwargs.get('stream', False)
290
- server_address, server_port = 'NOT_FOUND', 'NOT_FOUND'
291
- request_model = kwargs.get('model', 'openai/gpt-4o')
104
+ streaming = kwargs.get("stream", False)
105
+ server_address, server_port = set_server_address_and_port(instance, "NOT_FOUND", "NOT_FOUND")
106
+ request_model = kwargs.get("model", "openai/gpt-4o")
292
107
 
293
- span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
108
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
294
109
 
295
- # pylint: disable=no-else-return
296
110
  if streaming:
297
- # Special handling for streaming response to accommodate the nature of data flow
111
+ # Special handling for streaming response
298
112
  awaited_wrapped = wrapped(*args, **kwargs)
299
113
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
300
-
301
- return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
302
-
303
- # Handling for non-streaming responses
304
- # Handling for non-streaming responses
114
+ return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
305
115
  else:
306
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
116
+ # Handling for non-streaming responses
117
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
307
118
  start_time = time.time()
308
119
  response = wrapped(*args, **kwargs)
309
- end_time = time.time()
310
-
311
- response_dict = response_as_dict(response)
312
120
 
313
121
  try:
314
- # Format 'messages' into a single string
315
- message_prompt = kwargs.get('messages', '')
316
- formatted_messages = []
317
- for message in message_prompt:
318
- role = message['role']
319
- content = message['content']
320
-
321
- if isinstance(content, list):
322
- content_str = ", ".join(
323
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
324
- if "type" in item else f'text: {item["text"]}'
325
- for item in content
326
- )
327
- formatted_messages.append(f'{role}: {content_str}')
328
- else:
329
- formatted_messages.append(f'{role}: {content}')
330
- prompt = '\n'.join(formatted_messages)
331
-
332
- input_tokens = response_dict.get('usage').get('prompt_tokens')
333
- output_tokens = response_dict.get('usage').get('completion_tokens')
334
-
335
- # Calculate cost of the operation
336
- cost = get_chat_model_cost(request_model,
337
- pricing_info, input_tokens,
338
- output_tokens)
339
-
340
- # Set base span attribues (OTel Semconv)
341
- span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
342
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
343
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
344
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
345
- SemanticConvention.GEN_AI_SYSTEM_LITELLM)
346
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
347
- request_model)
348
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
349
- kwargs.get('seed', ''))
350
- span.set_attribute(SemanticConvention.SERVER_PORT,
351
- server_port)
352
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
353
- kwargs.get('frequency_penalty', 0.0))
354
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
355
- kwargs.get('max_tokens', -1))
356
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
357
- kwargs.get('presence_penalty', 0.0))
358
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
359
- kwargs.get('stop', []))
360
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
361
- kwargs.get('temperature', 1.0))
362
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
363
- kwargs.get('top_p', 1.0))
364
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
365
- response_dict.get('id'))
366
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
367
- response_dict.get('model'))
368
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
369
- input_tokens)
370
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
371
- output_tokens)
372
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
373
- server_address)
374
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
375
- kwargs.get('service_tier', 'auto'))
376
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
377
- str(response_dict.get('system_fingerprint')))
378
-
379
- # Set base span attribues (Extras)
380
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
381
- environment)
382
- span.set_attribute(SERVICE_NAME,
383
- application_name)
384
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
385
- kwargs.get('user', ''))
386
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
387
- False)
388
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
389
- input_tokens + output_tokens)
390
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
391
- cost)
392
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
393
- end_time - start_time)
394
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
395
- version)
396
- if capture_message_content:
397
- span.add_event(
398
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
399
- attributes={
400
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
401
- },
402
- )
403
-
404
- for i in range(kwargs.get('n',1)):
405
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
406
- [response_dict.get('choices')[i].get('finish_reason')])
407
- if capture_message_content:
408
- span.add_event(
409
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
410
- attributes={
411
- # pylint: disable=line-too-long
412
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
413
- },
414
- )
415
- if kwargs.get('tools'):
416
- span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
417
- str(response_dict.get('choices')[i].get('message').get('tool_calls')))
418
-
419
- if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
420
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
421
- 'text')
422
- elif response_dict.get('choices')[i].get('message').get('content') is not None:
423
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
424
- 'json')
425
-
426
- span.set_status(Status(StatusCode.OK))
427
-
428
- if disable_metrics is False:
429
- attributes = create_metrics_attributes(
430
- service_name=application_name,
431
- deployment_environment=environment,
432
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
433
- system=SemanticConvention.GEN_AI_SYSTEM_LITELLM,
434
- request_model=request_model,
435
- server_address=server_address,
436
- server_port=server_port,
437
- response_model=response_dict.get('model'),
438
- )
439
-
440
- metrics['genai_client_usage_tokens'].record(
441
- input_tokens + output_tokens, attributes
442
- )
443
- metrics['genai_client_operation_duration'].record(
444
- end_time - start_time, attributes
445
- )
446
- metrics['genai_server_ttft'].record(
447
- end_time - start_time, attributes
448
- )
449
- metrics['genai_requests'].add(1, attributes)
450
- metrics['genai_completion_tokens'].add(output_tokens, attributes)
451
- metrics['genai_prompt_tokens'].add(input_tokens, attributes)
452
- metrics['genai_cost'].record(cost, attributes)
453
-
454
- # Return original response
455
- return response
122
+ response = process_chat_response(
123
+ response=response,
124
+ request_model=request_model,
125
+ pricing_info=pricing_info,
126
+ server_port=server_port,
127
+ server_address=server_address,
128
+ environment=environment,
129
+ application_name=application_name,
130
+ metrics=metrics,
131
+ start_time=start_time,
132
+ span=span,
133
+ capture_message_content=capture_message_content,
134
+ disable_metrics=disable_metrics,
135
+ version=version,
136
+ **kwargs
137
+ )
456
138
 
457
139
  except Exception as e:
458
140
  handle_exception(span, e)
459
- logger.error('Error in trace creation: %s', e)
460
141
 
461
- # Return original response
462
- return response
142
+ return response
463
143
 
464
144
  return wrapper
465
145
 
466
- def embedding(version, environment, application_name,
467
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
146
+ def embedding(version, environment, application_name, tracer, pricing_info,
147
+ capture_message_content, metrics, disable_metrics):
468
148
  """
469
- Generates a telemetry wrapper for embeddings to collect metrics.
470
-
471
- Args:
472
- version: Version of the monitoring package.
473
- environment: Deployment environment (e.g., production, staging).
474
- application_name: Name of the application using the LiteLLM API.
475
- tracer: OpenTelemetry tracer for creating spans.
476
- pricing_info: Information used for calculating the cost of LiteLLM usage.
477
- capture_message_content: Flag indicating whether to trace the actual content.
478
-
479
- Returns:
480
- A function that wraps the embeddings method to add telemetry.
149
+ Generates a telemetry wrapper for GenAI embedding function call
481
150
  """
482
151
 
483
152
  def wrapper(wrapped, instance, args, kwargs):
484
153
  """
485
- Wraps the 'embeddings' API call to add telemetry.
486
-
487
- This collects metrics such as execution time, cost, and token usage, and handles errors
488
- gracefully, adding details to the trace for observability.
489
-
490
- Args:
491
- wrapped: The original 'embeddings' method to be wrapped.
492
- instance: The instance of the class where the original method is defined.
493
- args: Positional arguments for the 'embeddings' method.
494
- kwargs: Keyword arguments for the 'embeddings' method.
495
-
496
- Returns:
497
- The response from the original 'embeddings' method.
154
+ Wraps the GenAI embedding function call.
498
155
  """
156
+ server_address, server_port = set_server_address_and_port(instance, "NOT_FOUND", "NOT_FOUND")
157
+ request_model = kwargs.get("model", "text-embedding-ada-002")
499
158
 
500
- server_address, server_port = 'NOT_FOUND', 'NOT_FOUND'
501
- request_model = kwargs.get('model', 'text-embedding-ada-002')
502
-
503
- span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
159
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
504
160
 
505
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
161
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
506
162
  start_time = time.time()
507
163
  response = wrapped(*args, **kwargs)
508
- end_time = time.time()
509
164
 
510
- response_dict = response_as_dict(response)
511
165
  try:
512
- input_tokens = response_dict.get('usage').get('prompt_tokens')
513
-
514
- # Calculate cost of the operation
515
- cost = get_embed_model_cost(request_model,
516
- pricing_info, input_tokens)
517
-
518
- # Set Span attributes (OTel Semconv)
519
- span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
520
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
521
- SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING)
522
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
523
- SemanticConvention.GEN_AI_SYSTEM_LITELLM)
524
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
525
- request_model)
526
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS,
527
- [kwargs.get('encoding_format', 'float')])
528
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
529
- response_dict.get('model'))
530
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
531
- server_address)
532
- span.set_attribute(SemanticConvention.SERVER_PORT,
533
- server_port)
534
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
535
- input_tokens)
536
-
537
- # Set Span attributes (Extras)
538
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
539
- environment)
540
- span.set_attribute(SERVICE_NAME,
541
- application_name)
542
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
543
- kwargs.get('user', ''))
544
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
545
- input_tokens)
546
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
547
- cost)
548
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
549
- version)
550
-
551
- if capture_message_content:
552
- span.add_event(
553
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
554
- attributes={
555
- SemanticConvention.GEN_AI_CONTENT_PROMPT: str(kwargs.get('input', '')),
556
- },
557
- )
558
-
559
- span.set_status(Status(StatusCode.OK))
560
-
561
- if disable_metrics is False:
562
- attributes = create_metrics_attributes(
563
- service_name=application_name,
564
- deployment_environment=environment,
565
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
566
- system=SemanticConvention.GEN_AI_SYSTEM_LITELLM,
567
- request_model=request_model,
568
- server_address=server_address,
569
- server_port=server_port,
570
- response_model=response_dict.get('model'),
571
- )
572
- metrics['genai_client_usage_tokens'].record(
573
- input_tokens, attributes
574
- )
575
- metrics['genai_client_operation_duration'].record(
576
- end_time - start_time, attributes
577
- )
578
- metrics['genai_requests'].add(1, attributes)
579
- metrics['genai_prompt_tokens'].add(input_tokens, attributes)
580
- metrics['genai_cost'].record(cost, attributes)
581
-
582
- # Return original response
583
- return response
166
+ response = process_embedding_response(
167
+ response=response,
168
+ request_model=request_model,
169
+ pricing_info=pricing_info,
170
+ server_port=server_port,
171
+ server_address=server_address,
172
+ environment=environment,
173
+ application_name=application_name,
174
+ metrics=metrics,
175
+ start_time=start_time,
176
+ span=span,
177
+ capture_message_content=capture_message_content,
178
+ disable_metrics=disable_metrics,
179
+ version=version,
180
+ **kwargs
181
+ )
584
182
 
585
183
  except Exception as e:
586
184
  handle_exception(span, e)
587
- logger.error('Error in trace creation: %s', e)
588
185
 
589
- # Return original response
590
- return response
186
+ return response
591
187
 
592
188
  return wrapper