openlit 1.29.1__py3-none-any.whl → 1.30.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,8 +6,15 @@ Module for monitoring OpenAI API calls.
6
6
  import logging
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
8
  from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
- from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, get_audio_model_cost
10
- from openlit.__helpers import get_image_model_cost, openai_tokens, handle_exception
9
+ from openlit.__helpers import (
10
+ get_chat_model_cost,
11
+ get_embed_model_cost,
12
+ get_audio_model_cost,
13
+ get_image_model_cost,
14
+ openai_tokens,
15
+ handle_exception,
16
+ response_as_dict,
17
+ )
11
18
  from openlit.semcov import SemanticConvetion
12
19
 
13
20
  # Initialize logger for logging potential issues and operations
@@ -31,10 +38,184 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
31
38
  A function that wraps the chat completions method to add telemetry.
32
39
  """
33
40
 
41
+ class TracedAsyncStream:
42
+ """
43
+ Wrapper for streaming responses to collect metrics and trace data.
44
+ Wraps the 'openai.AsyncStream' response to collect message IDs and aggregated response.
45
+
46
+ This class implements the '__aiter__' and '__anext__' methods that
47
+ handle asynchronous streaming responses.
48
+
49
+ This class also implements '__aenter__' and '__aexit__' methods that
50
+ handle asynchronous context management protocol.
51
+ """
52
+ def __init__(
53
+ self,
54
+ wrapped,
55
+ span,
56
+ kwargs,
57
+ **args,
58
+ ):
59
+ self.__wrapped__ = wrapped
60
+ self._span = span
61
+ # Placeholder for aggregating streaming response
62
+ self._llmresponse = ""
63
+ self._response_id = ""
64
+
65
+ self._args = args
66
+ self._kwargs = kwargs
67
+
68
+ async def __aenter__(self):
69
+ await self.__wrapped__.__aenter__()
70
+ return self
71
+
72
+ async def __aexit__(self, exc_type, exc_value, traceback):
73
+ await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
74
+
75
+ def __aiter__(self):
76
+ return self
77
+
78
+ async def __getattr__(self, name):
79
+ """Delegate attribute access to the wrapped object."""
80
+ return getattr(await self.__wrapped__, name)
81
+
82
+ async def __anext__(self):
83
+ try:
84
+ chunk = await self.__wrapped__.__anext__()
85
+ chunked = response_as_dict(chunk)
86
+ # Collect message IDs and aggregated response from events
87
+ if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
88
+ 'content' in chunked.get('choices')[0].get('delta'))):
89
+
90
+ content = chunked.get('choices')[0].get('delta').get('content')
91
+ if content:
92
+ self._llmresponse += content
93
+ self._response_id = chunked.get('id')
94
+ return chunk
95
+ except StopAsyncIteration:
96
+ # Handling exception ensure observability without disrupting operation
97
+ try:
98
+ # Format 'messages' into a single string
99
+ message_prompt = self._kwargs.get("messages", "")
100
+ formatted_messages = []
101
+ for message in message_prompt:
102
+ role = message["role"]
103
+ content = message["content"]
104
+
105
+ if isinstance(content, list):
106
+ content_str = ", ".join(
107
+ # pylint: disable=line-too-long
108
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
109
+ if "type" in item else f'text: {item["text"]}'
110
+ for item in content
111
+ )
112
+ formatted_messages.append(f"{role}: {content_str}")
113
+ else:
114
+ formatted_messages.append(f"{role}: {content}")
115
+ prompt = "\n".join(formatted_messages)
116
+
117
+ # Calculate tokens using input prompt and aggregated response
118
+ prompt_tokens = openai_tokens(prompt,
119
+ self._kwargs.get("model", "gpt-3.5-turbo"))
120
+ completion_tokens = openai_tokens(self._llmresponse,
121
+ self._kwargs.get("model", "gpt-3.5-turbo"))
122
+
123
+ # Calculate cost of the operation
124
+ cost = get_chat_model_cost(self._kwargs.get("model", "gpt-3.5-turbo"),
125
+ pricing_info, prompt_tokens,
126
+ completion_tokens)
127
+
128
+ # Set Span attributes
129
+ self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
130
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
131
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
132
+ self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
133
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
134
+ self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
135
+ gen_ai_endpoint)
136
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
137
+ self._response_id)
138
+ self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
139
+ environment)
140
+ self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
141
+ application_name)
142
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
143
+ self._kwargs.get("model", "gpt-3.5-turbo"))
144
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
145
+ self._kwargs.get("user", ""))
146
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
147
+ self._kwargs.get("top_p", 1.0))
148
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
149
+ self._kwargs.get("max_tokens", -1))
150
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
151
+ self._kwargs.get("temperature", 1.0))
152
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
153
+ self._kwargs.get("presence_penalty", 0.0))
154
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
155
+ self._kwargs.get("frequency_penalty", 0.0))
156
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
157
+ self._kwargs.get("seed", ""))
158
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
159
+ True)
160
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
161
+ prompt_tokens)
162
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
163
+ completion_tokens)
164
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
165
+ prompt_tokens + completion_tokens)
166
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
167
+ cost)
168
+ if trace_content:
169
+ self._span.add_event(
170
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
171
+ attributes={
172
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
173
+ },
174
+ )
175
+ self._span.add_event(
176
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
177
+ attributes={
178
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
179
+ },
180
+ )
181
+
182
+ self._span.set_status(Status(StatusCode.OK))
183
+
184
+ if disable_metrics is False:
185
+ attributes = {
186
+ TELEMETRY_SDK_NAME:
187
+ "openlit",
188
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
189
+ application_name,
190
+ SemanticConvetion.GEN_AI_SYSTEM:
191
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
192
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
193
+ environment,
194
+ SemanticConvetion.GEN_AI_TYPE:
195
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
196
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
197
+ self._kwargs.get("model", "gpt-3.5-turbo")
198
+ }
199
+
200
+ metrics["genai_requests"].add(1, attributes)
201
+ metrics["genai_total_tokens"].add(
202
+ prompt_tokens + completion_tokens, attributes
203
+ )
204
+ metrics["genai_completion_tokens"].add(completion_tokens, attributes)
205
+ metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
206
+ metrics["genai_cost"].record(cost, attributes)
207
+
208
+ except Exception as e:
209
+ handle_exception(self._span, e)
210
+ logger.error("Error in trace creation: %s", e)
211
+ finally:
212
+ self._span.end()
213
+ raise
214
+
34
215
  async def wrapper(wrapped, instance, args, kwargs):
35
216
  """
36
217
  Wraps the 'chat.completions' API call to add telemetry.
37
-
218
+
38
219
  This collects metrics such as execution time, cost, and token usage, and handles errors
39
220
  gracefully, adding details to the trace for observability.
40
221
 
@@ -54,140 +235,10 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
54
235
  # pylint: disable=no-else-return
55
236
  if streaming:
56
237
  # Special handling for streaming response to accommodate the nature of data flow
57
- async def stream_generator():
58
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
59
- # Placeholder for aggregating streaming response
60
- llmresponse = ""
61
-
62
- # Loop through streaming events capturing relevant details
63
- async for chunk in await wrapped(*args, **kwargs):
64
- # Collect message IDs and aggregated response from events
65
- if len(chunk.choices) > 0:
66
- # pylint: disable=line-too-long
67
- if hasattr(chunk.choices[0], "delta") and hasattr(chunk.choices[0].delta, "content"):
68
- content = chunk.choices[0].delta.content
69
- if content:
70
- llmresponse += content
71
- yield chunk
72
- response_id = chunk.id
73
-
74
- # Handling exception ensure observability without disrupting operation
75
- try:
76
- # Format 'messages' into a single string
77
- message_prompt = kwargs.get("messages", "")
78
- formatted_messages = []
79
- for message in message_prompt:
80
- role = message["role"]
81
- content = message["content"]
82
-
83
- if isinstance(content, list):
84
- content_str = ", ".join(
85
- # pylint: disable=line-too-long
86
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
87
- if "type" in item else f'text: {item["text"]}'
88
- for item in content
89
- )
90
- formatted_messages.append(f"{role}: {content_str}")
91
- else:
92
- formatted_messages.append(f"{role}: {content}")
93
- prompt = "\n".join(formatted_messages)
94
-
95
- # Calculate tokens using input prompt and aggregated response
96
- prompt_tokens = openai_tokens(prompt,
97
- kwargs.get("model", "gpt-3.5-turbo"))
98
- completion_tokens = openai_tokens(llmresponse,
99
- kwargs.get("model", "gpt-3.5-turbo"))
238
+ awaited_wrapped = await wrapped(*args, **kwargs)
239
+ span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
100
240
 
101
- # Calculate cost of the operation
102
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
103
- pricing_info, prompt_tokens,
104
- completion_tokens)
105
-
106
- # Set Span attributes
107
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
108
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
109
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
110
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
111
- SemanticConvetion.GEN_AI_TYPE_CHAT)
112
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
113
- gen_ai_endpoint)
114
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
115
- response_id)
116
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
117
- environment)
118
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
119
- application_name)
120
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
121
- kwargs.get("model", "gpt-3.5-turbo"))
122
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
123
- kwargs.get("user", ""))
124
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
125
- kwargs.get("top_p", 1.0))
126
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
127
- kwargs.get("max_tokens", -1))
128
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
129
- kwargs.get("temperature", 1.0))
130
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
131
- kwargs.get("presence_penalty", 0.0))
132
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
133
- kwargs.get("frequency_penalty", 0.0))
134
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
135
- kwargs.get("seed", ""))
136
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
137
- True)
138
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
139
- prompt_tokens)
140
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
141
- completion_tokens)
142
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
143
- prompt_tokens + completion_tokens)
144
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
145
- cost)
146
- if trace_content:
147
- span.add_event(
148
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
149
- attributes={
150
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
151
- },
152
- )
153
- span.add_event(
154
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
155
- attributes={
156
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
157
- },
158
- )
159
-
160
- span.set_status(Status(StatusCode.OK))
161
-
162
- if disable_metrics is False:
163
- attributes = {
164
- TELEMETRY_SDK_NAME:
165
- "openlit",
166
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
167
- application_name,
168
- SemanticConvetion.GEN_AI_SYSTEM:
169
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
170
- SemanticConvetion.GEN_AI_ENVIRONMENT:
171
- environment,
172
- SemanticConvetion.GEN_AI_TYPE:
173
- SemanticConvetion.GEN_AI_TYPE_CHAT,
174
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
175
- kwargs.get("model", "gpt-3.5-turbo")
176
- }
177
-
178
- metrics["genai_requests"].add(1, attributes)
179
- metrics["genai_total_tokens"].add(
180
- prompt_tokens + completion_tokens, attributes
181
- )
182
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
183
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
184
- metrics["genai_cost"].record(cost, attributes)
185
-
186
- except Exception as e:
187
- handle_exception(span, e)
188
- logger.error("Error in trace creation: %s", e)
189
-
190
- return stream_generator()
241
+ return TracedAsyncStream(awaited_wrapped, span, kwargs)
191
242
 
192
243
  # Handling for non-streaming responses
193
244
  else:
@@ -195,6 +246,8 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
195
246
  with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
196
247
  response = await wrapped(*args, **kwargs)
197
248
 
249
+ response_dict = response_as_dict(response)
250
+
198
251
  try:
199
252
  # Format 'messages' into a single string
200
253
  message_prompt = kwargs.get("messages", "")
@@ -224,7 +277,7 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
224
277
  span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
225
278
  gen_ai_endpoint)
226
279
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
227
- response.id)
280
+ response_dict.get("id"))
228
281
  span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
229
282
  environment)
230
283
  span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
@@ -255,23 +308,21 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
255
308
  },
256
309
  )
257
310
 
258
- span.set_status(Status(StatusCode.OK))
259
-
260
311
  # Set span attributes when tools is not passed to the function call
261
312
  if "tools" not in kwargs:
262
313
  # Calculate cost of the operation
263
314
  cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
264
- pricing_info, response.usage.prompt_tokens,
265
- response.usage.completion_tokens)
315
+ pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
316
+ response_dict.get('usage', {}).get('completion_tokens', None))
266
317
 
267
318
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
268
- response.usage.prompt_tokens)
319
+ response_dict.get('usage', {}).get('prompt_tokens', None))
269
320
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
270
- response.usage.completion_tokens)
321
+ response_dict.get('usage', {}).get('completion_tokens', None))
271
322
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
272
- response.usage.total_tokens)
323
+ response_dict.get('usage', {}).get('total_tokens', None))
273
324
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
274
- [response.choices[0].finish_reason])
325
+ [response_dict.get('choices', [])[0].get('finish_reason', None)])
275
326
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
276
327
  cost)
277
328
 
@@ -281,7 +332,7 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
281
332
  span.add_event(
282
333
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
283
334
  attributes={
284
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[0].message.content,
335
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
285
336
  },
286
337
  )
287
338
 
@@ -293,7 +344,7 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
293
344
  span.add_event(
294
345
  name=attribute_name,
295
346
  attributes={
296
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.choices[i].message.content,
347
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
297
348
  },
298
349
  )
299
350
  i += 1
@@ -305,9 +356,8 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
305
356
  elif "tools" in kwargs:
306
357
  # Calculate cost of the operation
307
358
  cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
308
- pricing_info, response.usage.prompt_tokens,
309
- response.usage.completion_tokens)
310
-
359
+ pricing_info, response_dict.get('usage').get('prompt_tokens'),
360
+ response_dict.get('usage').get('completion_tokens'))
311
361
  span.add_event(
312
362
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
313
363
  attributes={
@@ -315,11 +365,11 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
315
365
  },
316
366
  )
317
367
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
318
- response.usage.prompt_tokens)
368
+ response_dict.get('usage').get('prompt_tokens'))
319
369
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
320
- response.usage.completion_tokens)
370
+ response_dict.get('usage').get('completion_tokens'))
321
371
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
322
- response.usage.total_tokens)
372
+ response_dict.get('usage').get('total_tokens'))
323
373
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
324
374
  cost)
325
375
 
@@ -342,9 +392,9 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
342
392
  }
343
393
 
344
394
  metrics["genai_requests"].add(1, attributes)
345
- metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
346
- metrics["genai_completion_tokens"].add(response.usage.completion_tokens, attributes)
347
- metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
395
+ metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
396
+ metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
397
+ metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
348
398
  metrics["genai_cost"].record(cost, attributes)
349
399
 
350
400
  # Return original response
@@ -363,7 +413,7 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
363
413
  tracer, pricing_info, trace_content, metrics, disable_metrics):
364
414
  """
365
415
  Generates a telemetry wrapper for embeddings to collect metrics.
366
-
416
+
367
417
  Args:
368
418
  gen_ai_endpoint: Endpoint identifier for logging and tracing.
369
419
  version: Version of the monitoring package.
@@ -372,7 +422,7 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
372
422
  tracer: OpenTelemetry tracer for creating spans.
373
423
  pricing_info: Information used for calculating the cost of OpenAI usage.
374
424
  trace_content: Flag indicating whether to trace the actual content.
375
-
425
+
376
426
  Returns:
377
427
  A function that wraps the embeddings method to add telemetry.
378
428
  """
@@ -418,8 +468,8 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
418
468
  kwargs.get("model", "text-embedding-ada-002"))
419
469
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
420
470
  kwargs.get("encoding_format", "float"))
421
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
422
- kwargs.get("dimensions", ""))
471
+ # span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
472
+ # kwargs.get("dimensions", "null"))
423
473
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
424
474
  kwargs.get("user", ""))
425
475
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
@@ -475,7 +525,7 @@ def async_finetune(gen_ai_endpoint, version, environment, application_name,
475
525
  tracer, pricing_info, trace_content, metrics, disable_metrics):
476
526
  """
477
527
  Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
478
-
528
+
479
529
  Args:
480
530
  gen_ai_endpoint: Endpoint identifier for logging and tracing.
481
531
  version: Version of the monitoring package.
@@ -484,7 +534,7 @@ def async_finetune(gen_ai_endpoint, version, environment, application_name,
484
534
  tracer: OpenTelemetry tracer for creating spans.
485
535
  pricing_info: Information used for calculating the cost of OpenAI usage.
486
536
  trace_content: Flag indicating whether to trace the actual content.
487
-
537
+
488
538
  Returns:
489
539
  A function that wraps the fine tuning creation method to add telemetry.
490
540
  """
@@ -509,13 +559,14 @@ def async_finetune(gen_ai_endpoint, version, environment, application_name,
509
559
  with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
510
560
  response = await wrapped(*args, **kwargs)
511
561
 
562
+ # Handling exception ensure observability without disrupting operation
512
563
  try:
513
564
  # Set Span attributes
514
565
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
515
566
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
516
567
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
517
568
  span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
518
- "fine_tuning")
569
+ SemanticConvetion.GEN_AI_TYPE_FINETUNING)
519
570
  span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
520
571
  gen_ai_endpoint)
521
572
  span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
@@ -546,7 +597,22 @@ def async_finetune(gen_ai_endpoint, version, environment, application_name,
546
597
  span.set_status(Status(StatusCode.OK))
547
598
 
548
599
  if disable_metrics is False:
549
- metrics["genai_requests"].add(1)
600
+ attributes = {
601
+ TELEMETRY_SDK_NAME:
602
+ "openlit",
603
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
604
+ application_name,
605
+ SemanticConvetion.GEN_AI_SYSTEM:
606
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
607
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
608
+ environment,
609
+ SemanticConvetion.GEN_AI_TYPE:
610
+ SemanticConvetion.GEN_AI_TYPE_FINETUNING,
611
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
612
+ kwargs.get("model", "gpt-3.5-turbo")
613
+ }
614
+
615
+ metrics["genai_requests"].add(1, attributes)
550
616
 
551
617
  # Return original response
552
618
  return response
@@ -564,7 +630,7 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
564
630
  tracer, pricing_info, trace_content, metrics, disable_metrics):
565
631
  """
566
632
  Generates a telemetry wrapper for image generation to collect metrics.
567
-
633
+
568
634
  Args:
569
635
  gen_ai_endpoint: Endpoint identifier for logging and tracing.
570
636
  version: Version of the monitoring package.
@@ -573,7 +639,7 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
573
639
  tracer: OpenTelemetry tracer for creating spans.
574
640
  pricing_info: Information used for calculating the cost of OpenAI image generation.
575
641
  trace_content: Flag indicating whether to trace the input prompt and generated images.
576
-
642
+
577
643
  Returns:
578
644
  A function that wraps the image generation method to add telemetry.
579
645
  """
@@ -694,7 +760,7 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
694
760
  tracer, pricing_info, trace_content, metrics, disable_metrics):
695
761
  """
696
762
  Generates a telemetry wrapper for creating image variations to collect metrics.
697
-
763
+
698
764
  Args:
699
765
  gen_ai_endpoint: Endpoint identifier for logging and tracing.
700
766
  version: Version of the monitoring package.
@@ -703,7 +769,7 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
703
769
  tracer: OpenTelemetry tracer for creating spans.
704
770
  pricing_info: Information used for calculating the cost of generating image variations.
705
771
  trace_content: Flag indicating whether to trace the input image and generated variations.
706
-
772
+
707
773
  Returns:
708
774
  A function that wraps the image variations creation method to add telemetry.
709
775
  """
@@ -813,7 +879,7 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
813
879
  tracer, pricing_info, trace_content, metrics, disable_metrics):
814
880
  """
815
881
  Generates a telemetry wrapper for creating speech audio to collect metrics.
816
-
882
+
817
883
  Args:
818
884
  gen_ai_endpoint: Endpoint identifier for logging and tracing.
819
885
  version: Version of the monitoring package.
@@ -822,7 +888,7 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
822
888
  tracer: OpenTelemetry tracer for creating spans.
823
889
  pricing_info: Information used for calculating the cost of generating speech audio.
824
890
  trace_content: Flag indicating whether to trace the input text and generated audio.
825
-
891
+
826
892
  Returns:
827
893
  A function that wraps the speech audio creation method to add telemetry.
828
894
  """