openlit 1.34.19__py3-none-any.whl → 1.34.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,78 +2,55 @@
2
2
  Module for monitoring OpenAI API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
6
+ from opentelemetry.trace import SpanKind
9
7
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
- get_embed_model_cost,
12
- get_audio_model_cost,
13
- get_image_model_cost,
14
- general_tokens,
15
8
  handle_exception,
16
- extract_and_format_input,
17
- concatenate_all_contents,
18
- response_as_dict,
19
- calculate_ttft,
20
- calculate_tbt,
21
- create_metrics_attributes,
22
9
  set_server_address_and_port
23
10
  )
11
+ from openlit.instrumentation.openai.utils import (
12
+ process_chat_chunk,
13
+ process_response_chunk,
14
+ process_chat_response,
15
+ process_streaming_chat_response,
16
+ process_streaming_response_response,
17
+ process_response_response,
18
+ process_embedding_response,
19
+ process_image_response,
20
+ process_audio_response,
21
+ )
24
22
  from openlit.semcov import SemanticConvention
25
23
 
26
- # Initialize logger for logging potential issues and operations
27
- logger = logging.getLogger(__name__)
28
-
29
- def responses(version, environment, application_name,
30
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
24
+ def chat_completions(version, environment, application_name, tracer, pricing_info,
25
+ capture_message_content, metrics, disable_metrics):
31
26
  """
32
- Generates a telemetry wrapper for chat completions to collect metrics.
33
-
34
- Args:
35
- version: Version of the monitoring package.
36
- environment: Deployment environment (e.g., production, staging).
37
- application_name: Name of the application using the OpenAI API.
38
- tracer: OpenTelemetry tracer for creating spans.
39
- pricing_info: Information used for calculating the cost of OpenAI usage.
40
- capture_message_content: Flag indicating whether to trace the actual content.
41
-
42
- Returns:
43
- A function that wraps the chat completions method to add telemetry.
27
+ Generates a telemetry wrapper for OpenAI chat completions.
44
28
  """
45
29
 
46
30
  class TracedSyncStream:
47
31
  """
48
- Wrapper for streaming responses to collect metrics and trace data.
49
- Wraps the response to collect message IDs and aggregated response.
50
-
51
- This class implements the '__aiter__' and '__anext__' methods that
52
- handle asynchronous streaming responses.
53
-
54
- This class also implements '__aenter__' and '__aexit__' methods that
55
- handle asynchronous context management protocol.
32
+ Wrapper for streaming responses to collect telemetry.
56
33
  """
34
+
57
35
  def __init__(
58
36
  self,
59
37
  wrapped,
60
38
  span,
39
+ span_name,
61
40
  kwargs,
62
41
  server_address,
63
42
  server_port,
64
- **args,
65
43
  ):
66
44
  self.__wrapped__ = wrapped
67
45
  self._span = span
68
- # Placeholder for aggregating streaming response
46
+ self._span_name = span_name
69
47
  self._llmresponse = ""
70
48
  self._response_id = ""
71
49
  self._response_model = ""
72
50
  self._finish_reason = ""
73
- self._input_tokens = ""
74
- self._output_tokens = ""
75
-
76
- self._args = args
51
+ self._system_fingerprint = ""
52
+ self._service_tier = "auto"
53
+ self._tools = None
77
54
  self._kwargs = kwargs
78
55
  self._start_time = time.time()
79
56
  self._end_time = None
@@ -100,383 +77,106 @@ def responses(version, environment, application_name,
100
77
  def __next__(self):
101
78
  try:
102
79
  chunk = self.__wrapped__.__next__()
103
- end_time = time.time()
104
- # Record the timestamp for the current chunk
105
- self._timestamps.append(end_time)
106
-
107
- if len(self._timestamps) == 1:
108
- # Calculate time to first chunk
109
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
110
-
111
- chunked = response_as_dict(chunk)
112
- # Collect message IDs and aggregated response from events
113
- if chunked.get('type') == "response.output_text.delta":
114
- self._llmresponse += chunked.get('delta')
115
- if chunked.get('type') == "response.completed":
116
- self._response_id = chunked.get('response').get('id')
117
- self._response_model = chunked.get('response').get('model')
118
- self._finish_reason = chunked.get('response').get('status')
119
- self._input_tokens = chunked.get('response').get('usage').get('input_tokens')
120
- self._output_tokens = chunked.get('response').get('usage').get('output_tokens')
80
+ process_chat_chunk(self, chunk)
121
81
  return chunk
122
82
  except StopIteration:
123
- # Handling exception ensure observability without disrupting operation
124
83
  try:
125
- self._end_time = time.time()
126
- if len(self._timestamps) > 1:
127
- self._tbt = calculate_tbt(self._timestamps)
128
-
129
- try:
130
- formatted_messages = extract_and_format_input(self._kwargs.get('input', ''))
131
- prompt = concatenate_all_contents(formatted_messages)
132
- except:
133
- prompt = self._kwargs.get('input', '')
134
-
135
- request_model = self._kwargs.get("model", "gpt-4o")
136
-
137
- # Calculate cost of the operation
138
- cost = get_chat_model_cost(request_model,
139
- pricing_info, self._input_tokens,
140
- self._output_tokens)
141
-
142
- # Set Span attributes (OTel Semconv)
143
- self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
144
- self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
145
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
146
- self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
147
- SemanticConvention.GEN_AI_SYSTEM_OPENAI)
148
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
149
- request_model)
150
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
151
- str(self._kwargs.get("seed", "")))
152
- self._span.set_attribute(SemanticConvention.SERVER_PORT,
153
- self._server_port)
154
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
155
- str(self._kwargs.get("max_output_tokens", -1)))
156
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
157
- str(self._kwargs.get("stop", [])))
158
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
159
- str(self._kwargs.get("temperature", 1.0)))
160
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
161
- str(self._kwargs.get("top_p", 1.0)))
162
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
163
- [self._finish_reason])
164
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
165
- self._response_id)
166
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
167
- self._response_model)
168
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
169
- self._input_tokens)
170
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
171
- self._output_tokens)
172
- self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
173
- self._server_address)
174
- if isinstance(self._llmresponse, str):
175
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
176
- "text")
177
- else:
178
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
179
- "json")
180
-
181
- # Set Span attributes (Extra)
182
- self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
183
- environment)
184
- self._span.set_attribute(SERVICE_NAME,
185
- application_name)
186
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
187
- self._kwargs.get("user", ""))
188
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
189
- True)
190
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
191
- self._input_tokens + self._output_tokens)
192
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
193
- cost)
194
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
195
- self._tbt)
196
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
197
- self._ttft)
198
- self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
199
- version)
200
-
201
- if capture_message_content:
202
- self._span.add_event(
203
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
204
- attributes={
205
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
206
- },
207
- )
208
- self._span.add_event(
209
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
210
- attributes={
211
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
212
- },
213
- )
214
- self._span.set_status(Status(StatusCode.OK))
215
-
216
- if disable_metrics is False:
217
- attributes = create_metrics_attributes(
218
- service_name=application_name,
219
- deployment_environment=environment,
220
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
221
- system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
222
- request_model=request_model,
223
- server_address=self._server_address,
224
- server_port=self._server_port,
225
- response_model=self._response_model,
84
+ with self._span:
85
+ process_streaming_chat_response(
86
+ self,
87
+ pricing_info=pricing_info,
88
+ environment=environment,
89
+ application_name=application_name,
90
+ metrics=metrics,
91
+ capture_message_content=capture_message_content,
92
+ disable_metrics=disable_metrics,
93
+ version=version
226
94
  )
227
-
228
- metrics["genai_client_usage_tokens"].record(
229
- self._input_tokens + self._output_tokens, attributes
230
- )
231
- metrics["genai_client_operation_duration"].record(
232
- self._end_time - self._start_time, attributes
233
- )
234
- metrics["genai_server_tbt"].record(
235
- self._tbt, attributes
236
- )
237
- metrics["genai_server_ttft"].record(
238
- self._ttft, attributes
239
- )
240
- metrics["genai_requests"].add(1, attributes)
241
- metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
242
- metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
243
- metrics["genai_cost"].record(cost, attributes)
244
-
245
95
  except Exception as e:
246
96
  handle_exception(self._span, e)
247
- logger.error("Error in trace creation: %s", e)
248
- finally:
249
- self._span.end()
250
97
  raise
251
98
 
252
99
  def wrapper(wrapped, instance, args, kwargs):
253
100
  """
254
- Wraps the 'chat.completions' API call to add telemetry.
255
-
256
- This collects metrics such as execution time, cost, and token usage, and handles errors
257
- gracefully, adding details to the trace for observability.
258
-
259
- Args:
260
- wrapped: The original 'chat.completions' method to be wrapped.
261
- instance: The instance of the class where the original method is defined.
262
- args: Positional arguments for the 'chat.completions' method.
263
- kwargs: Keyword arguments for the 'chat.completions' method.
264
-
265
- Returns:
266
- The response from the original 'chat.completions' method.
101
+ Wraps the OpenAI chat completions call.
267
102
  """
268
103
 
269
- # Check if streaming is enabled for the API call
270
104
  streaming = kwargs.get("stream", False)
271
105
  server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
272
106
  request_model = kwargs.get("model", "gpt-4o")
273
107
 
274
108
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
275
109
 
276
- # pylint: disable=no-else-return
277
110
  if streaming:
278
- # Special handling for streaming response to accommodate the nature of data flow
279
111
  awaited_wrapped = wrapped(*args, **kwargs)
280
112
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
281
113
 
282
- return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
114
+ return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
283
115
 
284
- # Handling for non-streaming responses
285
116
  else:
286
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
117
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
287
118
  start_time = time.time()
288
119
  response = wrapped(*args, **kwargs)
289
- end_time = time.time()
290
-
291
- response_dict = response_as_dict(response)
292
120
 
293
121
  try:
294
- try:
295
- formatted_messages = extract_and_format_input(kwargs.get('input', ''))
296
- prompt = concatenate_all_contents(formatted_messages)
297
- except:
298
- prompt = kwargs.get('input', '')
299
-
300
- input_tokens = response_dict.get('usage').get('input_tokens')
301
- output_tokens = response_dict.get('usage').get('output_tokens')
302
-
303
- # Calculate cost of the operation
304
- cost = get_chat_model_cost(request_model,
305
- pricing_info, input_tokens,
306
- output_tokens)
307
-
308
- # Set base span attribues (OTel Semconv)
309
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
310
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
311
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
312
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
313
- SemanticConvention.GEN_AI_SYSTEM_OPENAI)
314
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
315
- request_model)
316
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
317
- kwargs.get("seed", ""))
318
- span.set_attribute(SemanticConvention.SERVER_PORT,
319
- server_port)
320
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
321
- kwargs.get("max_output_tokens", -1))
322
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
323
- kwargs.get("stop", []))
324
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
325
- str(response_dict.get("temperature", 1.0)))
326
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
327
- str(response_dict.get("top_p", 1.0)))
328
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
329
- response_dict.get("id"))
330
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
331
- response_dict.get('model'))
332
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
333
- input_tokens)
334
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
335
- output_tokens)
336
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
337
- server_address)
338
-
339
- # Set base span attribues (Extras)
340
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
341
- environment)
342
- span.set_attribute(SERVICE_NAME,
343
- application_name)
344
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
345
- kwargs.get("user", ""))
346
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
347
- False)
348
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
349
- input_tokens + output_tokens)
350
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
351
- cost)
352
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
353
- end_time - start_time)
354
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
355
- version)
356
-
357
- if capture_message_content:
358
- span.add_event(
359
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
360
- attributes={
361
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
362
- },
363
- )
364
-
365
- for i in range(kwargs.get('n',1)):
366
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
367
- [response_dict.get('status')])
368
- try:
369
- llm_response = response_dict.get('output')[i].get('content')[0].get('text','')
370
- except:
371
- llm_response = ''
372
-
373
- if capture_message_content:
374
- span.add_event(
375
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
376
- attributes={
377
- # pylint: disable=line-too-long
378
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: llm_response,
379
- },
380
- )
381
- if kwargs.get('tools'):
382
- span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
383
- str(response_dict.get('tools')))
384
-
385
- if isinstance(llm_response, str):
386
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
387
- "text")
388
- elif llm_response is not None:
389
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
390
- "json")
391
-
392
- span.set_status(Status(StatusCode.OK))
393
-
394
- if disable_metrics is False:
395
- attributes = create_metrics_attributes(
396
- service_name=application_name,
397
- deployment_environment=environment,
398
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
399
- system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
122
+ response = process_chat_response(
123
+ response=response,
400
124
  request_model=request_model,
401
- server_address=server_address,
125
+ pricing_info=pricing_info,
402
126
  server_port=server_port,
403
- response_model=response_dict.get('model'),
404
- )
405
-
406
- metrics["genai_client_usage_tokens"].record(
407
- input_tokens + output_tokens, attributes
408
- )
409
- metrics["genai_client_operation_duration"].record(
410
- end_time - start_time, attributes
411
- )
412
- metrics["genai_server_ttft"].record(
413
- end_time - start_time, attributes
414
- )
415
- metrics["genai_requests"].add(1, attributes)
416
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
417
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
418
- metrics["genai_cost"].record(cost, attributes)
419
-
420
- # Return original response
421
- return response
127
+ server_address=server_address,
128
+ environment=environment,
129
+ application_name=application_name,
130
+ metrics=metrics,
131
+ start_time=start_time,
132
+ span=span,
133
+ capture_message_content=capture_message_content,
134
+ disable_metrics=disable_metrics,
135
+ version=version,
136
+ **kwargs
137
+ )
422
138
 
423
139
  except Exception as e:
424
140
  handle_exception(span, e)
425
- logger.error("Error in trace creation: %s", e)
426
141
 
427
- # Return original response
428
- return response
142
+ return response
429
143
 
430
144
  return wrapper
431
145
 
432
- def chat_completions(version, environment, application_name,
433
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
146
+ def responses(version, environment, application_name, tracer, pricing_info,
147
+ capture_message_content, metrics, disable_metrics, **kwargs):
434
148
  """
435
- Generates a telemetry wrapper for chat completions to collect metrics.
436
-
437
- Args:
438
- version: Version of the monitoring package.
439
- environment: Deployment environment (e.g., production, staging).
440
- application_name: Name of the application using the OpenAI API.
441
- tracer: OpenTelemetry tracer for creating spans.
442
- pricing_info: Information used for calculating the cost of OpenAI usage.
443
- capture_message_content: Flag indicating whether to trace the actual content.
444
-
445
- Returns:
446
- A function that wraps the chat completions method to add telemetry.
149
+ Generates a telemetry wrapper for OpenAI responses API.
447
150
  """
448
151
 
449
152
  class TracedSyncStream:
450
153
  """
451
- Wrapper for streaming responses to collect metrics and trace data.
452
- Wraps the response to collect message IDs and aggregated response.
453
-
454
- This class implements the '__aiter__' and '__anext__' methods that
455
- handle asynchronous streaming responses.
456
-
457
- This class also implements '__aenter__' and '__aexit__' methods that
458
- handle asynchronous context management protocol.
154
+ Wrapper for streaming responses to collect telemetry.
459
155
  """
156
+
460
157
  def __init__(
461
158
  self,
462
159
  wrapped,
463
160
  span,
161
+ span_name,
464
162
  kwargs,
465
163
  server_address,
466
164
  server_port,
467
- **args,
468
165
  ):
469
166
  self.__wrapped__ = wrapped
470
167
  self._span = span
471
- # Placeholder for aggregating streaming response
168
+ self._span_name = span_name
472
169
  self._llmresponse = ""
473
170
  self._response_id = ""
474
171
  self._response_model = ""
475
172
  self._finish_reason = ""
476
- self._openai_response_service_tier = ""
477
- self._openai_system_fingerprint = ""
478
-
479
- self._args = args
173
+ self._input_tokens = 0
174
+ self._output_tokens = 0
175
+ self._reasoning_tokens = 0
176
+ self._operation_type = "responses"
177
+ self._service_tier = "default"
178
+ self._tools = None
179
+ self._response_tools = None
480
180
  self._kwargs = kwargs
481
181
  self._start_time = time.time()
482
182
  self._end_time = None
@@ -503,578 +203,126 @@ def chat_completions(version, environment, application_name,
503
203
  def __next__(self):
504
204
  try:
505
205
  chunk = self.__wrapped__.__next__()
506
- end_time = time.time()
507
- # Record the timestamp for the current chunk
508
- self._timestamps.append(end_time)
509
-
510
- if len(self._timestamps) == 1:
511
- # Calculate time to first chunk
512
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
513
-
514
- chunked = response_as_dict(chunk)
515
- # Collect message IDs and aggregated response from events
516
- if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
517
- 'content' in chunked.get('choices')[0].get('delta'))):
518
-
519
- content = chunked.get('choices')[0].get('delta').get('content')
520
- if content:
521
- self._llmresponse += content
522
- self._response_id = chunked.get('id')
523
- self._response_model = chunked.get('model')
524
- try:
525
- self._finish_reason = chunked.get('choices', [])[0].get('finish_reason')
526
- except (IndexError, AttributeError, TypeError):
527
- self._finish_reason = "stop"
528
- self._openai_response_service_tier = chunked.get('service_tier') or 'auto'
529
- self._openai_system_fingerprint = chunked.get('system_fingerprint')
206
+ process_response_chunk(self, chunk)
530
207
  return chunk
531
208
  except StopIteration:
532
- # Handling exception ensure observability without disrupting operation
533
209
  try:
534
- self._end_time = time.time()
535
- if len(self._timestamps) > 1:
536
- self._tbt = calculate_tbt(self._timestamps)
537
-
538
- # Format 'messages' into a single string
539
- message_prompt = self._kwargs.get("messages", "")
540
- formatted_messages = []
541
- for message in message_prompt:
542
- role = message["role"]
543
- content = message["content"]
544
-
545
- if isinstance(content, list):
546
- content_str_list = []
547
- for item in content:
548
- if item["type"] == "text":
549
- content_str_list.append(f'text: {item["text"]}')
550
- elif (item["type"] == "image_url" and
551
- not item["image_url"]["url"].startswith("data:")):
552
- content_str_list.append(f'image_url: {item["image_url"]["url"]}')
553
- content_str = ", ".join(content_str_list)
554
- formatted_messages.append(f"{role}: {content_str}")
555
- else:
556
- formatted_messages.append(f"{role}: {content}")
557
- prompt = "\n".join(formatted_messages)
558
-
559
- request_model = self._kwargs.get("model", "gpt-4o")
560
-
561
- # Calculate tokens using input prompt and aggregated response
562
- input_tokens = general_tokens(prompt)
563
- output_tokens = general_tokens(self._llmresponse)
564
-
565
- # Calculate cost of the operation
566
- cost = get_chat_model_cost(request_model,
567
- pricing_info, input_tokens,
568
- output_tokens)
569
-
570
- # Set Span attributes (OTel Semconv)
571
- self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
572
- self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
573
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
574
- self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
575
- SemanticConvention.GEN_AI_SYSTEM_OPENAI)
576
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
577
- request_model)
578
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
579
- str(self._kwargs.get("seed", "")))
580
- self._span.set_attribute(SemanticConvention.SERVER_PORT,
581
- self._server_port)
582
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
583
- str(self._kwargs.get("frequency_penalty", 0.0)))
584
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
585
- self._kwargs.get("max_tokens", -1))
586
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
587
- str(self._kwargs.get("presence_penalty", 0.0)))
588
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
589
- str(self._kwargs.get("stop", [])))
590
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
591
- str(self._kwargs.get("temperature", 1.0)))
592
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
593
- str(self._kwargs.get("top_p", 1.0)))
594
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
595
- [self._finish_reason])
596
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
597
- self._response_id)
598
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
599
- self._response_model)
600
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
601
- input_tokens)
602
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
603
- output_tokens)
604
- self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
605
- self._server_address)
606
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
607
- str(self._kwargs.get("service_tier", "auto")))
608
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER,
609
- self._openai_response_service_tier)
610
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
611
- self._openai_system_fingerprint)
612
- if isinstance(self._llmresponse, str):
613
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
614
- "text")
615
- else:
616
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
617
- "json")
618
-
619
- # Set Span attributes (Extra)
620
- self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
621
- environment)
622
- self._span.set_attribute(SERVICE_NAME,
623
- application_name)
624
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
625
- str(self._kwargs.get("user", "")))
626
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
627
- True)
628
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
629
- input_tokens + output_tokens)
630
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
631
- cost)
632
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
633
- self._tbt)
634
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
635
- self._ttft)
636
- self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
637
- version)
638
- if capture_message_content:
639
- self._span.add_event(
640
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
641
- attributes={
642
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
643
- },
210
+ with self._span:
211
+ process_streaming_response_response(
212
+ self,
213
+ pricing_info=pricing_info,
214
+ environment=environment,
215
+ application_name=application_name,
216
+ metrics=metrics,
217
+ capture_message_content=capture_message_content,
218
+ disable_metrics=disable_metrics,
219
+ version=version
644
220
  )
645
- self._span.add_event(
646
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
647
- attributes={
648
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
649
- },
650
- )
651
- self._span.set_status(Status(StatusCode.OK))
652
-
653
- if disable_metrics is False:
654
- attributes = create_metrics_attributes(
655
- service_name=application_name,
656
- deployment_environment=environment,
657
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
658
- system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
659
- request_model=request_model,
660
- server_address=self._server_address,
661
- server_port=self._server_port,
662
- response_model=self._response_model,
663
- )
664
-
665
- metrics["genai_client_usage_tokens"].record(
666
- input_tokens + output_tokens, attributes
667
- )
668
- metrics["genai_client_operation_duration"].record(
669
- self._end_time - self._start_time, attributes
670
- )
671
- metrics["genai_server_tbt"].record(
672
- self._tbt, attributes
673
- )
674
- metrics["genai_server_ttft"].record(
675
- self._ttft, attributes
676
- )
677
- metrics["genai_requests"].add(1, attributes)
678
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
679
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
680
- metrics["genai_cost"].record(cost, attributes)
681
-
682
221
  except Exception as e:
683
222
  handle_exception(self._span, e)
684
- logger.error("Error in trace creation: %s", e)
685
- finally:
686
- self._span.end()
687
223
  raise
688
224
 
689
225
  def wrapper(wrapped, instance, args, kwargs):
690
226
  """
691
- Wraps the 'chat.completions' API call to add telemetry.
692
-
693
- This collects metrics such as execution time, cost, and token usage, and handles errors
694
- gracefully, adding details to the trace for observability.
695
-
696
- Args:
697
- wrapped: The original 'chat.completions' method to be wrapped.
698
- instance: The instance of the class where the original method is defined.
699
- args: Positional arguments for the 'chat.completions' method.
700
- kwargs: Keyword arguments for the 'chat.completions' method.
701
-
702
- Returns:
703
- The response from the original 'chat.completions' method.
227
+ Wraps the OpenAI responses API call.
704
228
  """
705
229
 
706
- # Check if streaming is enabled for the API call
707
230
  streaming = kwargs.get("stream", False)
708
231
  server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
709
232
  request_model = kwargs.get("model", "gpt-4o")
710
233
 
711
234
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
712
235
 
713
- # pylint: disable=no-else-return
714
236
  if streaming:
715
- # Special handling for streaming response to accommodate the nature of data flow
716
237
  awaited_wrapped = wrapped(*args, **kwargs)
717
238
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
718
239
 
719
- return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
240
+ return TracedSyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
720
241
 
721
- # Handling for non-streaming responses
722
242
  else:
723
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
243
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
724
244
  start_time = time.time()
725
245
  response = wrapped(*args, **kwargs)
726
- end_time = time.time()
727
-
728
- response_dict = response_as_dict(response)
729
246
 
730
247
  try:
731
- # Format 'messages' into a single string
732
- message_prompt = kwargs.get("messages", "")
733
- formatted_messages = []
734
- for message in message_prompt:
735
- role = message["role"]
736
- content = message["content"]
737
-
738
- if isinstance(content, list):
739
- content_str = ", ".join(
740
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
741
- if "type" in item else f'text: {item["text"]}'
742
- for item in content
743
- )
744
- formatted_messages.append(f"{role}: {content_str}")
745
- else:
746
- formatted_messages.append(f"{role}: {content}")
747
- prompt = "\n".join(formatted_messages)
748
-
749
- input_tokens = response_dict.get('usage').get('prompt_tokens')
750
- output_tokens = response_dict.get('usage').get('completion_tokens')
751
-
752
- # Calculate cost of the operation
753
- cost = get_chat_model_cost(request_model,
754
- pricing_info, input_tokens,
755
- output_tokens)
756
-
757
- # Set base span attribues (OTel Semconv)
758
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
759
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
760
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
761
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
762
- SemanticConvention.GEN_AI_SYSTEM_OPENAI)
763
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
764
- request_model)
765
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
766
- str(kwargs.get("seed", "")))
767
- span.set_attribute(SemanticConvention.SERVER_PORT,
768
- server_port)
769
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
770
- str(kwargs.get("frequency_penalty", 0.0)))
771
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
772
- str(kwargs.get("max_tokens", -1)))
773
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
774
- str(kwargs.get("presence_penalty", 0.0)))
775
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
776
- str(kwargs.get("stop", [])))
777
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
778
- str(kwargs.get("temperature", 1.0)))
779
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
780
- str(kwargs.get("top_p", 1.0)))
781
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
782
- response_dict.get("id"))
783
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
784
- response_dict.get('model'))
785
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
786
- input_tokens)
787
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
788
- output_tokens)
789
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
790
- server_address)
791
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
792
- str(kwargs.get("service_tier", "auto")))
793
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER,
794
- response_dict.get('service_tier', 'auto'))
795
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
796
- str(response_dict.get('system_fingerprint', '')))
797
-
798
- # Set base span attribues (Extras)
799
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
800
- environment)
801
- span.set_attribute(SERVICE_NAME,
802
- application_name)
803
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
804
- kwargs.get("user", ""))
805
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
806
- False)
807
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
808
- input_tokens + output_tokens)
809
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
810
- cost)
811
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
812
- end_time - start_time)
813
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
814
- version)
815
- if capture_message_content:
816
- span.add_event(
817
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
818
- attributes={
819
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
820
- },
821
- )
822
-
823
- for i in range(kwargs.get('n',1)):
824
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
825
- [response_dict.get('choices')[i].get('finish_reason')])
826
- if capture_message_content:
827
- span.add_event(
828
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
829
- attributes={
830
- # pylint: disable=line-too-long
831
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
832
- },
833
- )
834
- if kwargs.get('tools'):
835
- span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
836
- str(response_dict.get('choices')[i].get('message').get('tool_calls')))
837
-
838
- if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
839
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
840
- "text")
841
- elif response_dict.get('choices')[i].get('message').get('content') is not None:
842
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
843
- "json")
844
-
845
- span.set_status(Status(StatusCode.OK))
846
-
847
- if disable_metrics is False:
848
- attributes = create_metrics_attributes(
849
- service_name=application_name,
850
- deployment_environment=environment,
851
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
852
- system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
248
+ response = process_response_response(
249
+ response=response,
853
250
  request_model=request_model,
854
- server_address=server_address,
251
+ pricing_info=pricing_info,
855
252
  server_port=server_port,
856
- response_model=response_dict.get('model'),
857
- )
858
-
859
- metrics["genai_client_usage_tokens"].record(
860
- input_tokens + output_tokens, attributes
861
- )
862
- metrics["genai_client_operation_duration"].record(
863
- end_time - start_time, attributes
864
- )
865
- metrics["genai_server_ttft"].record(
866
- end_time - start_time, attributes
867
- )
868
- metrics["genai_requests"].add(1, attributes)
869
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
870
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
871
- metrics["genai_cost"].record(cost, attributes)
872
-
873
- # Return original response
874
- return response
253
+ server_address=server_address,
254
+ environment=environment,
255
+ application_name=application_name,
256
+ metrics=metrics,
257
+ start_time=start_time,
258
+ span=span,
259
+ capture_message_content=capture_message_content,
260
+ disable_metrics=disable_metrics,
261
+ version=version,
262
+ **kwargs
263
+ )
875
264
 
876
265
  except Exception as e:
877
266
  handle_exception(span, e)
878
- logger.error("Error in trace creation: %s", e)
879
267
 
880
- # Return original response
881
- return response
268
+ return response
882
269
 
883
270
  return wrapper
884
271
 
885
- def chat_completions_parse(version, environment, application_name, tracer, pricing_info, capture_message_content,
886
- metrics, disable_metrics):
272
+ def chat_completions_parse(version, environment, application_name, tracer, pricing_info,
273
+ capture_message_content, metrics, disable_metrics):
887
274
  """
888
- Generates a telemetry wrapper for chat completions parse to collect metrics.
889
-
890
- Args:
891
- version: Version of the monitoring package.
892
- environment: Deployment environment (e.g., production, staging).
893
- application_name: Name of the application using the OpenAI API.
894
- tracer: OpenTelemetry tracer for creating spans.
895
- pricing_info: Information used for calculating the cost of OpenAI usage.
896
- capture_message_content: Flag indicating whether to trace the actual content.
897
-
898
- Returns:
899
- A function that wraps the chat completions parse method to add telemetry.
275
+ Generates a telemetry wrapper for OpenAI chat completions parse.
900
276
  """
901
277
 
902
278
  def wrapper(wrapped, instance, args, kwargs):
903
279
  """
904
- Wraps the 'chat.completions.parse' API call to add telemetry.
905
-
906
- This collects metrics such as execution time, cost, and token usage, and handles errors
907
- gracefully, adding details to the trace for observability.
908
-
909
- Args:
910
- wrapped: The original 'chat.completions' method to be wrapped.
911
- instance: The instance of the class where the original method is defined.
912
- args: Positional arguments for the 'chat.completions' method.
913
- kwargs: Keyword arguments for the 'chat.completions' method.
914
-
915
- Returns:
916
- The response from the original 'chat.completions.parse' method.
280
+ Wraps the OpenAI chat completions parse call.
917
281
  """
282
+
918
283
  server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
919
284
  request_model = kwargs.get("model", "gpt-4o")
285
+
920
286
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
921
287
 
922
288
  with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
923
289
  start_time = time.time()
924
- try:
925
- # Execute the original 'parse' method
926
- response = wrapped(*args, **kwargs)
927
- end_time = time.time()
928
-
929
- response_dict = response_as_dict(response)
930
-
931
- # Format 'messages' from kwargs to calculate input tokens
932
- message_prompt = kwargs.get("messages", "")
933
- formatted_messages = []
934
- for message in message_prompt:
935
- role = message.get("role")
936
- content = message.get("content")
937
- if content:
938
- formatted_messages.append(f"{role}: {content}")
939
- prompt = "\n".join(formatted_messages)
940
-
941
- input_tokens = response_dict.get('usage').get('prompt_tokens')
942
- output_tokens = response_dict.get('usage').get('completion_tokens')
943
-
944
- # Calculate cost
945
- cost = get_chat_model_cost(request_model,
946
- pricing_info, input_tokens,
947
- output_tokens)
948
-
949
- # Set base span attribues (OTel Semconv)
950
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
951
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
952
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_OPENAI)
953
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
954
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED, str(kwargs.get("seed", "")))
955
- span.set_attribute(SemanticConvention.SERVER_PORT, server_port)
956
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
957
- str(kwargs.get("frequency_penalty", 0.0)))
958
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, str(kwargs.get("max_tokens", -1)))
959
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
960
- str(kwargs.get("presence_penalty", 0.0)))
961
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, str(kwargs.get("stop", [])))
962
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, str(kwargs.get("temperature", 1.0)))
963
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, str(kwargs.get("top_p", 1.0)))
964
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, response_dict.get("id"))
965
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, response_dict.get('model'))
966
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
967
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
968
- span.set_attribute(SemanticConvention.SERVER_ADDRESS, server_address)
969
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SERVICE_TIER,
970
- str(kwargs.get("service_tier", "auto")))
971
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SERVICE_TIER,
972
- response_dict.get('service_tier', 'auto'))
973
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
974
- str(response_dict.get('system_fingerprint', '')))
975
-
976
- # Set base span attribues (Extras)
977
- span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
978
- span.set_attribute(SERVICE_NAME, application_name)
979
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER, kwargs.get("user", ""))
980
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
981
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, False)
982
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS, input_tokens + output_tokens)
983
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
984
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, end_time - start_time)
985
-
986
- if capture_message_content:
987
- span.add_event(
988
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
989
- attributes={SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt},
990
- )
290
+ response = wrapped(*args, **kwargs)
991
291
 
992
- for i in range(kwargs.get('n', 1)):
993
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
994
- [response_dict.get('choices')[i].get('finish_reason')])
995
- if capture_message_content:
996
- span.add_event(
997
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
998
- attributes={
999
- # pylint: disable=line-too-long
1000
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: str(
1001
- response_dict.get('choices')[i].get('message').get('content')),
1002
- },
1003
- )
1004
- if kwargs.get('tools'):
1005
- span.set_attribute(SemanticConvention.GEN_AI_TOOL_CALLS,
1006
- str(response_dict.get('choices')[i].get('message').get('tool_calls')))
1007
-
1008
- if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
1009
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
1010
- "text")
1011
- elif response_dict.get('choices')[i].get('message').get('content') is not None:
1012
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
1013
- "json")
1014
-
1015
- span.set_status(Status(StatusCode.OK))
1016
-
1017
- if not disable_metrics:
1018
- attributes = create_metrics_attributes(
1019
- service_name=application_name,
1020
- deployment_environment=environment,
1021
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
1022
- system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
292
+ try:
293
+ response = process_chat_response(
294
+ response=response,
1023
295
  request_model=request_model,
1024
- server_address=server_address,
296
+ pricing_info=pricing_info,
1025
297
  server_port=server_port,
1026
- response_model=response_dict.get('model'),
1027
- )
1028
- metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, attributes)
1029
- metrics["genai_client_operation_duration"].record(end_time - start_time, attributes)
1030
- metrics["genai_server_ttft"].record( end_time - start_time, attributes)
1031
- metrics["genai_requests"].add(1, attributes)
1032
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
1033
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
1034
- metrics["genai_cost"].record(cost, attributes)
1035
-
1036
- return response
298
+ server_address=server_address,
299
+ environment=environment,
300
+ application_name=application_name,
301
+ metrics=metrics,
302
+ start_time=start_time,
303
+ span=span,
304
+ capture_message_content=capture_message_content,
305
+ disable_metrics=disable_metrics,
306
+ version=version,
307
+ **kwargs
308
+ )
1037
309
 
1038
310
  except Exception as e:
1039
311
  handle_exception(span, e)
1040
- logger.error("Error in 'parse' trace creation: %s", e)
1041
- # Re-raise the exception to not interfere with the application flow
1042
- raise
312
+
313
+ return response
1043
314
 
1044
315
  return wrapper
1045
316
 
1046
- def embedding(version, environment, application_name,
1047
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
317
+ def embedding(version, environment, application_name, tracer, pricing_info,
318
+ capture_message_content, metrics, disable_metrics, **kwargs):
1048
319
  """
1049
- Generates a telemetry wrapper for embeddings to collect metrics.
1050
-
1051
- Args:
1052
- version: Version of the monitoring package.
1053
- environment: Deployment environment (e.g., production, staging).
1054
- application_name: Name of the application using the OpenAI API.
1055
- tracer: OpenTelemetry tracer for creating spans.
1056
- pricing_info: Information used for calculating the cost of OpenAI usage.
1057
- capture_message_content: Flag indicating whether to trace the actual content.
1058
-
1059
- Returns:
1060
- A function that wraps the embeddings method to add telemetry.
320
+ Generates a telemetry wrapper for OpenAI embeddings.
1061
321
  """
1062
322
 
1063
323
  def wrapper(wrapped, instance, args, kwargs):
1064
324
  """
1065
- Wraps the 'embeddings' API call to add telemetry.
1066
-
1067
- This collects metrics such as execution time, cost, and token usage, and handles errors
1068
- gracefully, adding details to the trace for observability.
1069
-
1070
- Args:
1071
- wrapped: The original 'embeddings' method to be wrapped.
1072
- instance: The instance of the class where the original method is defined.
1073
- args: Positional arguments for the 'embeddings' method.
1074
- kwargs: Keyword arguments for the 'embeddings' method.
1075
-
1076
- Returns:
1077
- The response from the original 'embeddings' method.
325
+ Wraps the OpenAI embeddings call.
1078
326
  """
1079
327
 
1080
328
  server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
@@ -1082,127 +330,44 @@ def embedding(version, environment, application_name,
1082
330
 
1083
331
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
1084
332
 
1085
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
333
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
1086
334
  start_time = time.time()
1087
335
  response = wrapped(*args, **kwargs)
1088
- end_time = time.time()
1089
336
 
1090
- response_dict = response_as_dict(response)
1091
337
  try:
1092
- input_tokens = response_dict.get('usage').get('prompt_tokens')
1093
-
1094
- # Calculate cost of the operation
1095
- cost = get_embed_model_cost(request_model,
1096
- pricing_info, input_tokens)
1097
-
1098
- # Set Span attributes (OTel Semconv)
1099
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
1100
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
1101
- SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING)
1102
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
1103
- SemanticConvention.GEN_AI_SYSTEM_OPENAI)
1104
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
1105
- request_model)
1106
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS,
1107
- [kwargs.get('encoding_format', 'float')])
1108
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
1109
- request_model)
1110
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
1111
- server_address)
1112
- span.set_attribute(SemanticConvention.SERVER_PORT,
1113
- server_port)
1114
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
1115
- input_tokens)
1116
-
1117
- # Set Span attributes (Extras)
1118
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
1119
- environment)
1120
- span.set_attribute(SERVICE_NAME,
1121
- application_name)
1122
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
1123
- kwargs.get("user", ""))
1124
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
1125
- input_tokens)
1126
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
1127
- cost)
1128
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
1129
- version)
1130
-
1131
- if capture_message_content:
1132
- span.add_event(
1133
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
1134
- attributes={
1135
- SemanticConvention.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
1136
- },
1137
- )
1138
-
1139
- span.set_status(Status(StatusCode.OK))
1140
-
1141
- if disable_metrics is False:
1142
- attributes = create_metrics_attributes(
1143
- service_name=application_name,
1144
- deployment_environment=environment,
1145
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
1146
- system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
338
+ response = process_embedding_response(
339
+ response=response,
1147
340
  request_model=request_model,
1148
- server_address=server_address,
341
+ pricing_info=pricing_info,
1149
342
  server_port=server_port,
1150
- response_model=request_model,
1151
- )
1152
- metrics["genai_client_usage_tokens"].record(
1153
- input_tokens, attributes
1154
- )
1155
- metrics["genai_client_operation_duration"].record(
1156
- end_time - start_time, attributes
1157
- )
1158
- metrics["genai_requests"].add(1, attributes)
1159
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
1160
- metrics["genai_cost"].record(cost, attributes)
1161
-
1162
- # Return original response
1163
- return response
343
+ server_address=server_address,
344
+ environment=environment,
345
+ application_name=application_name,
346
+ metrics=metrics,
347
+ start_time=start_time,
348
+ span=span,
349
+ capture_message_content=capture_message_content,
350
+ disable_metrics=disable_metrics,
351
+ version=version,
352
+ **kwargs
353
+ )
1164
354
 
1165
355
  except Exception as e:
1166
356
  handle_exception(span, e)
1167
- logger.error("Error in trace creation: %s", e)
1168
357
 
1169
- # Return original response
1170
- return response
358
+ return response
1171
359
 
1172
360
  return wrapper
1173
361
 
1174
- def image_generate(version, environment, application_name,
1175
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
362
+ def image_generate(version, environment, application_name, tracer, pricing_info,
363
+ capture_message_content, metrics, disable_metrics, **kwargs):
1176
364
  """
1177
- Generates a telemetry wrapper for image generation to collect metrics.
1178
-
1179
- Args:
1180
- version: Version of the monitoring package.
1181
- environment: Deployment environment (e.g., production, staging).
1182
- application_name: Name of the application using the OpenAI API.
1183
- tracer: OpenTelemetry tracer for creating spans.
1184
- pricing_info: Information used for calculating the cost of OpenAI image generation.
1185
- capture_message_content: Flag indicating whether to trace the input prompt and generated images.
1186
-
1187
- Returns:
1188
- A function that wraps the image generation method to add telemetry.
365
+ Generates a telemetry wrapper for OpenAI image generation.
1189
366
  """
1190
367
 
1191
368
  def wrapper(wrapped, instance, args, kwargs):
1192
369
  """
1193
- Wraps the 'images.generate' API call to add telemetry.
1194
-
1195
- This collects metrics such as execution time, cost, and handles errors
1196
- gracefully, adding details to the trace for observability.
1197
-
1198
- Args:
1199
- wrapped: The original 'images.generate' method to be wrapped.
1200
- instance: The instance of the class where the original method is defined.
1201
- args: Positional arguments for the 'images.generate' method.
1202
- kwargs: Keyword arguments for the 'images.generate' method.
1203
-
1204
- Returns:
1205
- The response from the original 'images.generate' method.
370
+ Wraps the OpenAI image generation call.
1206
371
  """
1207
372
 
1208
373
  server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
@@ -1210,146 +375,46 @@ def image_generate(version, environment, application_name,
1210
375
 
1211
376
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
1212
377
 
1213
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
378
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
1214
379
  start_time = time.time()
1215
380
  response = wrapped(*args, **kwargs)
1216
381
  end_time = time.time()
1217
382
 
1218
- images_count = 0
1219
-
1220
383
  try:
1221
- # Find Image format
1222
- if "response_format" in kwargs and kwargs["response_format"] == "b64_json":
1223
- image = "b64_json"
1224
- else:
1225
- image = "url"
1226
-
1227
- # Calculate cost of the operation
1228
- cost = get_image_model_cost(request_model,
1229
- pricing_info, kwargs.get("size", "1024x1024"),
1230
- kwargs.get("quality", "standard"))
1231
-
1232
- for items in response.data:
1233
- # Set Span attributes (OTel Semconv)
1234
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
1235
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
1236
- SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE)
1237
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
1238
- SemanticConvention.GEN_AI_SYSTEM_OPENAI)
1239
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
1240
- request_model)
1241
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
1242
- server_address)
1243
- span.set_attribute(SemanticConvention.SERVER_PORT,
1244
- server_port)
1245
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
1246
- response.created)
1247
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
1248
- request_model)
1249
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
1250
- "image")
1251
-
1252
- # Set Span attributes (Extras)
1253
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
1254
- environment)
1255
- span.set_attribute(SERVICE_NAME,
1256
- application_name)
1257
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_SIZE,
1258
- kwargs.get("size", "1024x1024"))
1259
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_QUALITY,
1260
- kwargs.get("quality", "standard"))
1261
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_STYLE,
1262
- kwargs.get("style", "vivid"))
1263
- span.set_attribute(SemanticConvention.GEN_AI_CONTENT_REVISED_PROMPT,
1264
- items.revised_prompt if items.revised_prompt else "")
1265
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
1266
- kwargs.get("user", ""))
1267
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
1268
- version)
1269
-
1270
- if capture_message_content:
1271
- span.add_event(
1272
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
1273
- attributes={
1274
- SemanticConvention.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
1275
- },
1276
- )
1277
- attribute_name = f"{SemanticConvention.GEN_AI_RESPONSE_IMAGE}.{images_count}"
1278
- span.add_event(
1279
- name=attribute_name,
1280
- attributes={
1281
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: getattr(items, image),
1282
- },
1283
- )
1284
-
1285
- images_count+=1
1286
-
1287
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
1288
- len(response.data) * cost)
1289
- span.set_status(Status(StatusCode.OK))
1290
-
1291
- if disable_metrics is False:
1292
- attributes = create_metrics_attributes(
1293
- service_name=application_name,
1294
- deployment_environment=environment,
1295
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE,
1296
- system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
384
+ response = process_image_response(
385
+ response=response,
1297
386
  request_model=request_model,
1298
- server_address=server_address,
387
+ pricing_info=pricing_info,
1299
388
  server_port=server_port,
1300
- response_model=request_model,
1301
- )
1302
-
1303
- metrics["genai_client_operation_duration"].record(
1304
- end_time - start_time, attributes
1305
- )
1306
- metrics["genai_requests"].add(1, attributes)
1307
- metrics["genai_cost"].record(cost, attributes)
1308
-
1309
- # Return original response
1310
- return response
389
+ server_address=server_address,
390
+ environment=environment,
391
+ application_name=application_name,
392
+ metrics=metrics,
393
+ start_time=start_time,
394
+ end_time=end_time,
395
+ span=span,
396
+ capture_message_content=capture_message_content,
397
+ disable_metrics=disable_metrics,
398
+ version=version,
399
+ **kwargs
400
+ )
1311
401
 
1312
402
  except Exception as e:
1313
403
  handle_exception(span, e)
1314
- logger.error("Error in trace creation: %s", e)
1315
404
 
1316
- # Return original response
1317
- return response
405
+ return response
1318
406
 
1319
407
  return wrapper
1320
408
 
1321
- def image_variatons(version, environment, application_name,
1322
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
409
+ def image_variatons(version, environment, application_name, tracer, pricing_info,
410
+ capture_message_content, metrics, disable_metrics):
1323
411
  """
1324
- Generates a telemetry wrapper for creating image variations to collect metrics.
1325
-
1326
- Args:
1327
- version: Version of the monitoring package.
1328
- environment: Deployment environment (e.g., production, staging).
1329
- application_name: Name of the application using the OpenAI API.
1330
- tracer: OpenTelemetry tracer for creating spans.
1331
- pricing_info: Information used for calculating the cost of generating image variations.
1332
- capture_message_content: Flag indicating whether to trace the input image and generated variations.
1333
-
1334
- Returns:
1335
- A function that wraps the image variations creation method to add telemetry.
412
+ Generates a telemetry wrapper for OpenAI image variations.
1336
413
  """
1337
414
 
1338
415
  def wrapper(wrapped, instance, args, kwargs):
1339
416
  """
1340
- Wraps the 'images.create.variations' API call to add telemetry.
1341
-
1342
- This collects metrics such as execution time, cost, and handles errors
1343
- gracefully, adding details to the trace for observability.
1344
-
1345
- Args:
1346
- wrapped: The original 'images.create.variations' method to be wrapped.
1347
- instance: The instance of the class where the original method is defined.
1348
- args: Positional arguments for the method.
1349
- kwargs: Keyword arguments for the method.
1350
-
1351
- Returns:
1352
- The response from the original 'images.create.variations' method.
417
+ Wraps the OpenAI image variations call.
1353
418
  """
1354
419
 
1355
420
  server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
@@ -1357,135 +422,46 @@ def image_variatons(version, environment, application_name,
1357
422
 
1358
423
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
1359
424
 
1360
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
425
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
1361
426
  start_time = time.time()
1362
427
  response = wrapped(*args, **kwargs)
1363
428
  end_time = time.time()
1364
429
 
1365
- images_count = 0
1366
-
1367
430
  try:
1368
- # Find Image format
1369
- if "response_format" in kwargs and kwargs["response_format"] == "b64_json":
1370
- image = "b64_json"
1371
- else:
1372
- image = "url"
1373
-
1374
- # Calculate cost of the operation
1375
- cost = get_image_model_cost(request_model, pricing_info,
1376
- kwargs.get("size", "1024x1024"), "standard")
1377
-
1378
- for items in response.data:
1379
- # Set Span attributes (OTel Semconv)
1380
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
1381
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
1382
- SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE)
1383
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
1384
- SemanticConvention.GEN_AI_SYSTEM_OPENAI)
1385
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
1386
- request_model)
1387
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
1388
- server_address)
1389
- span.set_attribute(SemanticConvention.SERVER_PORT,
1390
- server_port)
1391
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
1392
- response.created)
1393
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
1394
- request_model)
1395
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
1396
- "image")
1397
-
1398
- # Set Span attributes (Extras)
1399
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
1400
- environment)
1401
- span.set_attribute(SERVICE_NAME,
1402
- application_name)
1403
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_SIZE,
1404
- kwargs.get("size", "1024x1024"))
1405
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IMAGE_QUALITY,
1406
- "standard")
1407
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_USER,
1408
- kwargs.get("user", ""))
1409
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
1410
- version)
1411
-
1412
- if capture_message_content:
1413
- attribute_name = f"{SemanticConvention.GEN_AI_RESPONSE_IMAGE}.{images_count}"
1414
- span.add_event(
1415
- name=attribute_name,
1416
- attributes={
1417
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: getattr(items, image),
1418
- },
1419
- )
1420
-
1421
- images_count+=1
1422
-
1423
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
1424
- len(response.data) * cost)
1425
- span.set_status(Status(StatusCode.OK))
1426
-
1427
- if disable_metrics is False:
1428
- attributes = create_metrics_attributes(
1429
- service_name=application_name,
1430
- deployment_environment=environment,
1431
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_IMAGE,
1432
- system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
431
+ response = process_image_response(
432
+ response=response,
1433
433
  request_model=request_model,
1434
- server_address=server_address,
434
+ pricing_info=pricing_info,
1435
435
  server_port=server_port,
1436
- response_model=request_model,
1437
- )
1438
-
1439
- metrics["genai_client_operation_duration"].record(
1440
- end_time - start_time, attributes
1441
- )
1442
- metrics["genai_requests"].add(1, attributes)
1443
- metrics["genai_cost"].record(cost, attributes)
1444
-
1445
- # Return original response
1446
- return response
436
+ server_address=server_address,
437
+ environment=environment,
438
+ application_name=application_name,
439
+ metrics=metrics,
440
+ start_time=start_time,
441
+ end_time=end_time,
442
+ span=span,
443
+ capture_message_content=capture_message_content,
444
+ disable_metrics=disable_metrics,
445
+ version=version,
446
+ **kwargs
447
+ )
1447
448
 
1448
449
  except Exception as e:
1449
450
  handle_exception(span, e)
1450
- logger.error("Error in trace creation: %s", e)
1451
451
 
1452
- # Return original response
1453
- return response
452
+ return response
1454
453
 
1455
454
  return wrapper
1456
455
 
1457
- def audio_create(version, environment, application_name,
1458
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
456
+ def audio_create(version, environment, application_name, tracer, pricing_info,
457
+ capture_message_content, metrics, disable_metrics):
1459
458
  """
1460
- Generates a telemetry wrapper for creating speech audio to collect metrics.
1461
-
1462
- Args:
1463
- version: Version of the monitoring package.
1464
- environment: Deployment environment (e.g., production, staging).
1465
- application_name: Name of the application using the OpenAI API.
1466
- tracer: OpenTelemetry tracer for creating spans.
1467
- pricing_info: Information used for calculating the cost of generating speech audio.
1468
- capture_message_content: Flag indicating whether to trace the input text and generated audio.
1469
-
1470
- Returns:
1471
- A function that wraps the speech audio creation method to add telemetry.
459
+ Generates a telemetry wrapper for OpenAI audio creation.
1472
460
  """
1473
461
 
1474
462
  def wrapper(wrapped, instance, args, kwargs):
1475
463
  """
1476
- Wraps the 'audio.speech.create' API call to add telemetry.
1477
-
1478
- This collects metrics such as execution time, cost, and handles errors
1479
- gracefully, adding details to the trace for observability.
1480
-
1481
- Args:
1482
- wrapped: The original 'audio.speech.create' method to be wrapped.
1483
- instance: The instance of the class where the original method is defined.
1484
- args: Positional arguments for the 'audio.speech.create' method.
1485
- kwargs: Keyword arguments for the 'audio.speech.create' method.
1486
-
1487
- Returns:
1488
- The response from the original 'audio.speech.create' method.
464
+ Wraps the OpenAI audio creation call.
1489
465
  """
1490
466
 
1491
467
  server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
@@ -1499,77 +475,27 @@ def audio_create(version, environment, application_name,
1499
475
  end_time = time.time()
1500
476
 
1501
477
  try:
1502
- # Calculate cost of the operation
1503
- cost = get_audio_model_cost(request_model,
1504
- pricing_info, kwargs.get("input", ""))
1505
-
1506
- # Set Span attributes
1507
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
1508
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
1509
- SemanticConvention.GEN_AI_OPERATION_TYPE_AUDIO)
1510
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
1511
- SemanticConvention.GEN_AI_SYSTEM_OPENAI)
1512
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
1513
- request_model)
1514
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
1515
- server_address)
1516
- span.set_attribute(SemanticConvention.SERVER_PORT,
1517
- server_port)
1518
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
1519
- request_model)
1520
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
1521
- "speech")
1522
-
1523
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
1524
- environment)
1525
- span.set_attribute(SERVICE_NAME,
1526
- application_name)
1527
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_VOICE,
1528
- kwargs.get("voice", "alloy"))
1529
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
1530
- kwargs.get("response_format", "mp3"))
1531
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_AUDIO_SPEED,
1532
- kwargs.get("speed", 1))
1533
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
1534
- cost)
1535
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
1536
- version)
1537
- if capture_message_content:
1538
- span.add_event(
1539
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
1540
- attributes={
1541
- SemanticConvention.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
1542
- },
1543
- )
1544
-
1545
- span.set_status(Status(StatusCode.OK))
1546
-
1547
- if disable_metrics is False:
1548
- attributes = create_metrics_attributes(
1549
- service_name=application_name,
1550
- deployment_environment=environment,
1551
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_AUDIO,
1552
- system=SemanticConvention.GEN_AI_SYSTEM_OPENAI,
478
+ response = process_audio_response(
479
+ response=response,
1553
480
  request_model=request_model,
1554
- server_address=server_address,
481
+ pricing_info=pricing_info,
1555
482
  server_port=server_port,
1556
- response_model=request_model,
1557
- )
1558
-
1559
- metrics["genai_client_operation_duration"].record(
1560
- end_time - start_time, attributes
1561
- )
1562
- metrics["genai_requests"].add(1, attributes)
1563
- metrics["genai_cost"].record(cost, attributes)
1564
-
1565
- # Return original response
1566
- return response
483
+ server_address=server_address,
484
+ environment=environment,
485
+ application_name=application_name,
486
+ metrics=metrics,
487
+ start_time=start_time,
488
+ end_time=end_time,
489
+ span=span,
490
+ capture_message_content=capture_message_content,
491
+ disable_metrics=disable_metrics,
492
+ version=version,
493
+ **kwargs
494
+ )
1567
495
 
1568
496
  except Exception as e:
1569
497
  handle_exception(span, e)
1570
- logger.error("Error in trace creation: %s", e)
1571
498
 
1572
- # Return original response
1573
- return response
499
+ return response
1574
500
 
1575
501
  return wrapper