openlit 1.34.5__py3-none-any.whl → 1.34.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -733,7 +733,7 @@ def async_chat_completions(version, environment, application_name,
733
733
  formatted_messages = []
734
734
  for message in message_prompt:
735
735
  role = message["role"]
736
- content = message["content"]
736
+ content = message.get("content", "")
737
737
 
738
738
  if isinstance(content, list):
739
739
  content_str = ", ".join(
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of Prem AI Functions"""
3
2
 
4
3
  from typing import Collection
@@ -1,72 +1,66 @@
1
1
  """
2
- Module for monitoring Prem AI API calls.
2
+ Module for monitoring PremAI API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
6
+ from opentelemetry.trace import SpanKind
9
7
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
- get_embed_model_cost,
12
- general_tokens,
13
8
  handle_exception,
14
- calculate_ttft,
15
- calculate_tbt,
16
- create_metrics_attributes,
17
9
  set_server_address_and_port
18
10
  )
11
+ from openlit.instrumentation.premai.utils import (
12
+ process_chat_response,
13
+ process_chunk,
14
+ process_streaming_chat_response,
15
+ process_embedding_response
16
+ )
19
17
  from openlit.semcov import SemanticConvention
20
18
 
21
- # Initialize logger for logging potential issues and operations
22
- logger = logging.getLogger(__name__)
23
-
24
19
  def chat(version, environment, application_name,
25
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
20
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
26
21
  """
27
- Generates a telemetry wrapper for chat completions to collect metrics.
28
-
29
- Args:
30
- version: Version of the monitoring package.
31
- environment: Deployment environment (e.g., production, staging).
32
- application_name: Name of the application using the PremAI API.
33
- tracer: OpenTelemetry tracer for creating spans.
34
- pricing_info: Information used for calculating the cost of PremAI usage.
35
- capture_message_content: Flag indicating whether to trace the actual content.
36
-
37
- Returns:
38
- A function that wraps the chat completions method to add telemetry.
22
+ Generates a telemetry wrapper for GenAI function call
39
23
  """
40
24
 
41
25
  class TracedSyncStream:
42
26
  """
43
- Wrapper for streaming responses to collect metrics and trace data.
44
- Wraps the response to collect message IDs and aggregated response.
27
+ Wrapper for streaming responses to collect telemetry.
45
28
  """
46
29
 
47
- def __init__(self, wrapped, span, kwargs, server_address, server_port,**args):
30
+ def __init__(
31
+ self,
32
+ wrapped,
33
+ span,
34
+ span_name,
35
+ kwargs,
36
+ server_address,
37
+ server_port,
38
+ **args,
39
+ ):
48
40
  self.__wrapped__ = wrapped
49
41
  self._span = span
42
+ self._span_name = span_name
50
43
  self._llmresponse = ""
51
44
  self._response_id = ""
45
+ self._response_model = ""
46
+ self._input_tokens = 0
47
+ self._output_tokens = 0
48
+ self._finish_reason = ""
49
+ self._tools = None
52
50
  self._args = args
53
51
  self._kwargs = kwargs
54
- self._server_address = server_address
55
- self._server_port = server_port
56
52
  self._start_time = time.time()
57
53
  self._end_time = None
58
54
  self._timestamps = []
59
55
  self._ttft = 0
60
56
  self._tbt = 0
61
- self._response_model = ''
62
- self._finish_reason = ''
57
+ self._server_address = server_address
58
+ self._server_port = server_port
63
59
 
64
60
  def __enter__(self):
65
- # Using context management protocols (if needed)
66
61
  return self
67
62
 
68
63
  def __exit__(self, exc_type, exc_value, traceback):
69
- # Add any resource cleanup or finalization if required.
70
64
  pass
71
65
 
72
66
  def __getattr__(self, name):
@@ -75,391 +69,86 @@ def chat(version, environment, application_name,
75
69
 
76
70
  def __iter__(self):
77
71
  try:
78
- end_time = time.time()
79
- # Record the timestamp for the current chunk
80
- self._timestamps.append(end_time)
81
-
82
- if len(self._timestamps) == 1:
83
- # Calculate time to first chunk
84
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
85
-
86
- for chunk in self.__wrapped__:
87
- # Assuming `chunk` has similar structure as 'ChatCompletionResponseStream'
88
- if chunk.choices:
89
- first_choice = chunk.choices[0]
90
-
91
- if first_choice.delta.get('content'):
92
- self._llmresponse += first_choice.delta.get('content')
93
-
94
- if chunk.choices[0].finish_reason:
95
- self._finish_reason = chunk.choices[0].finish_reason
96
- self._response_id = chunk.id
97
- self._response_model = chunk.model
98
-
99
- if not chunk:
100
- # pylint: disable= stop-iteration-return
101
- raise StopIteration
102
- yield chunk
72
+ chunk = self.__wrapped__.__next__()
73
+ process_chunk(self, chunk)
74
+ return chunk
103
75
 
104
76
  finally:
105
- # Handling exception ensure observability without disrupting operation
106
77
  try:
107
- self._end_time = time.time()
108
- if len(self._timestamps) > 1:
109
- self._tbt = calculate_tbt(self._timestamps)
110
-
111
- # Format 'messages' into a single string
112
- message_prompt = self._kwargs.get("messages", "")
113
- formatted_messages = []
114
- for message in message_prompt:
115
- role = message["role"]
116
- content = message["content"]
117
-
118
- if isinstance(content, list):
119
- content_str = ", ".join(
120
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
121
- if "type" in item else f'text: {item["text"]}'
122
- for item in content
123
- )
124
- formatted_messages.append(f"{role}: {content_str}")
125
- else:
126
- formatted_messages.append(f"{role}: {content}")
127
- prompt = "\n".join(formatted_messages)
128
-
129
- request_model = self._kwargs.get("model", "gpt-4o-mini")
130
-
131
- # Calculate tokens using input prompt and aggregated response
132
- input_tokens = general_tokens(prompt)
133
- output_tokens = general_tokens(self._llmresponse)
134
-
135
- # Calculate cost of the operation
136
- cost = get_chat_model_cost(request_model,
137
- pricing_info, input_tokens,
138
- output_tokens)
139
-
140
- # Set Span attributes (OTel Semconv)
141
- self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
142
- self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
143
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
144
- self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
145
- SemanticConvention.GEN_AI_SYSTEM_PREMAI)
146
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
147
- request_model)
148
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
149
- self._kwargs.get("seed", ""))
150
- self._span.set_attribute(SemanticConvention.SERVER_PORT,
151
- self._server_port)
152
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
153
- self._kwargs.get("frequency_penalty", 0.0))
154
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
155
- self._kwargs.get("max_tokens", -1))
156
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
157
- self._kwargs.get("presence_penalty", 0.0))
158
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
159
- self._kwargs.get("stop", []))
160
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
161
- self._kwargs.get("temperature", 1.0))
162
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
163
- self._kwargs.get("top_p", 1.0))
164
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
165
- [self._finish_reason])
166
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
167
- self._response_id)
168
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
169
- self._response_model)
170
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
171
- input_tokens)
172
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
173
- output_tokens)
174
- self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
175
- self._server_address)
176
- if isinstance(self._llmresponse, str):
177
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
178
- "text")
179
- else:
180
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
181
- "json")
182
-
183
- # Set Span attributes (Extra)
184
- self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
185
- environment)
186
- self._span.set_attribute(SERVICE_NAME,
187
- application_name)
188
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
189
- self._kwargs.get("user", ""))
190
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
191
- True)
192
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
193
- input_tokens + output_tokens)
194
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
195
- cost)
196
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
197
- self._tbt)
198
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
199
- self._ttft)
200
- self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
201
- version)
202
- if capture_message_content:
203
- self._span.add_event(
204
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
205
- attributes={
206
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
207
- },
208
- )
209
- self._span.add_event(
210
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
211
- attributes={
212
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
213
- },
214
- )
215
- self._span.set_status(Status(StatusCode.OK))
216
-
217
- if disable_metrics is False:
218
- attributes = create_metrics_attributes(
219
- service_name=application_name,
220
- deployment_environment=environment,
221
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
222
- system=SemanticConvention.GEN_AI_SYSTEM_PREMAI,
223
- request_model=request_model,
224
- server_address=self._server_address,
225
- server_port=self._server_port,
226
- response_model=self._response_model,
227
- )
228
-
229
- metrics["genai_client_usage_tokens"].record(
230
- input_tokens + output_tokens, attributes
231
- )
232
- metrics["genai_client_operation_duration"].record(
233
- self._end_time - self._start_time, attributes
234
- )
235
- metrics["genai_server_tbt"].record(
236
- self._tbt, attributes
237
- )
238
- metrics["genai_server_ttft"].record(
239
- self._ttft, attributes
78
+ with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
79
+ process_streaming_chat_response(
80
+ self,
81
+ pricing_info=pricing_info,
82
+ environment=environment,
83
+ application_name=application_name,
84
+ metrics=metrics,
85
+ capture_message_content=capture_message_content,
86
+ disable_metrics=disable_metrics,
87
+ version=version
240
88
  )
241
- metrics["genai_requests"].add(1, attributes)
242
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
243
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
244
- metrics["genai_cost"].record(cost, attributes)
245
89
 
246
90
  except Exception as e:
247
91
  handle_exception(self._span, e)
248
- logger.error("Error in trace creation: %s", e)
249
- finally:
250
- self._span.end()
251
92
 
252
93
  def wrapper(wrapped, instance, args, kwargs):
253
94
  """
254
- Wraps the 'chat.completions' API call to add telemetry.
255
-
256
- This collects metrics such as execution time, cost, and token usage, and handles errors
257
- gracefully, adding details to the trace for observability.
258
-
259
- Args:
260
- wrapped: The original 'chat.completions' method to be wrapped.
261
- instance: The instance of the class where the original method is defined.
262
- args: Positional arguments for the 'chat.completions' method.
263
- kwargs: Keyword arguments for the 'chat.completions' method.
264
-
265
- Returns:
266
- The response from the original 'chat.completions' method.
95
+ Wraps the GenAI function call.
267
96
  """
268
97
 
269
98
  # Check if streaming is enabled for the API call
270
99
  streaming = kwargs.get("stream", False)
100
+
271
101
  server_address, server_port = set_server_address_and_port(instance, "app.premai.io", 443)
272
102
  request_model = kwargs.get("model", "gpt-4o-mini")
273
103
 
274
104
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
275
105
 
276
- # pylint: disable=no-else-return
277
106
  if streaming:
278
107
  # Special handling for streaming response to accommodate the nature of data flow
279
108
  awaited_wrapped = wrapped(*args, **kwargs)
280
109
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
281
-
282
- return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
110
+ return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
283
111
 
284
112
  # Handling for non-streaming responses
285
113
  else:
286
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
114
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
287
115
  start_time = time.time()
288
116
  response = wrapped(*args, **kwargs)
289
- end_time = time.time()
290
117
 
291
118
  try:
292
- # Format 'messages' into a single string
293
- message_prompt = kwargs.get("messages", "")
294
- formatted_messages = []
295
- for message in message_prompt:
296
- role = message["role"]
297
- content = message["content"]
298
-
299
- if isinstance(content, list):
300
- content_str = ", ".join(
301
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
302
- if "type" in item else f'text: {item["text"]}'
303
- for item in content
304
- )
305
- formatted_messages.append(f"{role}: {content_str}")
306
- else:
307
- formatted_messages.append(f"{role}: {content}")
308
- prompt = "\n".join(formatted_messages)
309
-
310
- input_tokens = response.usage.prompt_tokens
311
- output_tokens = response.usage.completion_tokens
312
-
313
- # Calculate cost of the operation
314
- cost = get_chat_model_cost(request_model,
315
- pricing_info, input_tokens,
316
- output_tokens)
317
-
318
- # Set base span attribues (OTel Semconv)
319
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
320
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
321
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
322
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
323
- SemanticConvention.GEN_AI_SYSTEM_PREMAI)
324
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
325
- request_model)
326
- span.set_attribute(SemanticConvention.SERVER_PORT,
327
- server_port)
328
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
329
- kwargs.get("frequency_penalty", 0.0))
330
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
331
- kwargs.get("max_tokens", -1))
332
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
333
- kwargs.get("presence_penalty", 0.0))
334
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
335
- kwargs.get("stop", []))
336
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
337
- kwargs.get("temperature", 1.0))
338
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
339
- kwargs.get("top_p", 1.0))
340
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
341
- response.additional_properties.get('id'))
342
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
343
- response.model)
344
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
345
- input_tokens)
346
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
347
- output_tokens)
348
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
349
- server_address)
350
-
351
- # Set base span attribues (Extras)
352
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
353
- environment)
354
- span.set_attribute(SERVICE_NAME,
355
- application_name)
356
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
357
- False)
358
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
359
- input_tokens + output_tokens)
360
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
361
- cost)
362
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
363
- end_time - start_time)
364
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
365
- version)
366
- if capture_message_content:
367
- span.add_event(
368
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
369
- attributes={
370
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
371
- },
372
- )
373
- span.add_event(
374
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
375
- attributes={
376
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(response.choices[0].message.content),
377
- },
378
- )
379
-
380
- if kwargs.get('tools'):
381
- span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
382
- str(response.choices[0].message.tool_calls))
383
-
384
- if kwargs.get('response_format', '') != '':
385
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
386
- "json")
387
- else:
388
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
389
- "text")
390
-
391
- span.set_status(Status(StatusCode.OK))
392
-
393
- if disable_metrics is False:
394
- attributes = create_metrics_attributes(
395
- service_name=application_name,
396
- deployment_environment=environment,
397
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
398
- system=SemanticConvention.GEN_AI_SYSTEM_PREMAI,
399
- request_model=request_model,
400
- server_address=server_address,
401
- server_port=server_port,
402
- response_model=response.model,
403
- )
404
-
405
- metrics["genai_client_usage_tokens"].record(
406
- input_tokens + output_tokens, attributes
407
- )
408
- metrics["genai_client_operation_duration"].record(
409
- end_time - start_time, attributes
410
- )
411
- metrics["genai_server_ttft"].record(
412
- end_time - start_time, attributes
413
- )
414
- metrics["genai_requests"].add(1, attributes)
415
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
416
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
417
- metrics["genai_cost"].record(cost, attributes)
418
-
419
- # Return original response
420
- return response
119
+ response = process_chat_response(
120
+ response=response,
121
+ request_model=request_model,
122
+ pricing_info=pricing_info,
123
+ server_port=server_port,
124
+ server_address=server_address,
125
+ environment=environment,
126
+ application_name=application_name,
127
+ metrics=metrics,
128
+ start_time=start_time,
129
+ span=span,
130
+ capture_message_content=capture_message_content,
131
+ disable_metrics=disable_metrics,
132
+ version=version,
133
+ **kwargs
134
+ )
421
135
 
422
136
  except Exception as e:
423
137
  handle_exception(span, e)
424
- logger.error("Error in trace creation: %s", e)
425
138
 
426
- # Return original response
427
- return response
139
+ return response
428
140
 
429
141
  return wrapper
430
142
 
431
143
  def embedding(version, environment, application_name,
432
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
144
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
433
145
  """
434
- Generates a telemetry wrapper for embeddings to collect metrics.
435
-
436
- Args:
437
- version: Version of the monitoring package.
438
- environment: Deployment environment (e.g., production, staging).
439
- application_name: Name of the application using the PremAI API.
440
- tracer: OpenTelemetry tracer for creating spans.
441
- pricing_info: Information used for calculating the cost of PremAI usage.
442
- capture_message_content: Flag indicating whether to trace the actual content.
443
-
444
- Returns:
445
- A function that wraps the embeddings method to add telemetry.
146
+ Generates a telemetry wrapper for GenAI function call
446
147
  """
447
148
 
448
149
  def wrapper(wrapped, instance, args, kwargs):
449
150
  """
450
- Wraps the 'embeddings' API call to add telemetry.
451
-
452
- This collects metrics such as execution time, cost, and token usage, and handles errors
453
- gracefully, adding details to the trace for observability.
454
-
455
- Args:
456
- wrapped: The original 'embeddings' method to be wrapped.
457
- instance: The instance of the class where the original method is defined.
458
- args: Positional arguments for the 'embeddings' method.
459
- kwargs: Keyword arguments for the 'embeddings' method.
460
-
461
- Returns:
462
- The response from the original 'embeddings' method.
151
+ Wraps the GenAI function call.
463
152
  """
464
153
 
465
154
  server_address, server_port = set_server_address_and_port(instance, "app.premai.io", 443)
@@ -467,90 +156,31 @@ def embedding(version, environment, application_name,
467
156
 
468
157
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
469
158
 
470
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
159
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
471
160
  start_time = time.time()
472
161
  response = wrapped(*args, **kwargs)
473
- end_time = time.time()
474
162
 
475
163
  try:
476
- input_tokens = response.usage.prompt_tokens
477
-
478
- # Calculate cost of the operation
479
- cost = get_embed_model_cost(request_model,
480
- pricing_info, input_tokens)
481
-
482
- # Set Span attributes (OTel Semconv)
483
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
484
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
485
- SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING)
486
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
487
- SemanticConvention.GEN_AI_SYSTEM_PREMAI)
488
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
489
- request_model)
490
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS,
491
- [kwargs.get('encoding_format', 'float')])
492
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
493
- response.model)
494
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
495
- server_address)
496
- span.set_attribute(SemanticConvention.SERVER_PORT,
497
- server_port)
498
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
499
- input_tokens)
500
-
501
- # Set Span attributes (Extras)
502
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
503
- environment)
504
- span.set_attribute(SERVICE_NAME,
505
- application_name)
506
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
507
- kwargs.get("user", ""))
508
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
509
- input_tokens)
510
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
511
- cost)
512
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
513
- version)
514
-
515
- if capture_message_content:
516
- span.add_event(
517
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
518
- attributes={
519
- SemanticConvention.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
520
- },
521
- )
522
-
523
- span.set_status(Status(StatusCode.OK))
524
-
525
- if disable_metrics is False:
526
- attributes = create_metrics_attributes(
527
- service_name=application_name,
528
- deployment_environment=environment,
529
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
530
- system=SemanticConvention.GEN_AI_SYSTEM_PREMAI,
531
- request_model=request_model,
532
- server_address=server_address,
533
- server_port=server_port,
534
- response_model=response.model,
535
- )
536
- metrics["genai_client_usage_tokens"].record(
537
- input_tokens, attributes
538
- )
539
- metrics["genai_client_operation_duration"].record(
540
- end_time - start_time, attributes
541
- )
542
- metrics["genai_requests"].add(1, attributes)
543
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
544
- metrics["genai_cost"].record(cost, attributes)
545
-
546
- # Return original response
547
- return response
164
+ response = process_embedding_response(
165
+ response=response,
166
+ request_model=request_model,
167
+ pricing_info=pricing_info,
168
+ server_port=server_port,
169
+ server_address=server_address,
170
+ environment=environment,
171
+ application_name=application_name,
172
+ metrics=metrics,
173
+ start_time=start_time,
174
+ span=span,
175
+ capture_message_content=capture_message_content,
176
+ disable_metrics=disable_metrics,
177
+ version=version,
178
+ **kwargs
179
+ )
548
180
 
549
181
  except Exception as e:
550
182
  handle_exception(span, e)
551
- logger.error("Error in trace creation: %s", e)
552
183
 
553
- # Return original response
554
- return response
184
+ return response
555
185
 
556
186
  return wrapper