openlit 1.33.7__py3-none-any.whl → 1.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. openlit/__helpers.py +83 -0
  2. openlit/__init__.py +1 -1
  3. openlit/instrumentation/ag2/ag2.py +2 -2
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +4 -4
  13. openlit/instrumentation/astra/async_astra.py +4 -4
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +4 -4
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +2 -2
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
  26. openlit/instrumentation/crewai/crewai.py +2 -2
  27. openlit/instrumentation/dynamiq/dynamiq.py +2 -2
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
  30. openlit/instrumentation/embedchain/embedchain.py +4 -4
  31. openlit/instrumentation/firecrawl/firecrawl.py +2 -2
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/gpt4all.py +17 -17
  36. openlit/instrumentation/groq/async_groq.py +14 -14
  37. openlit/instrumentation/groq/groq.py +14 -14
  38. openlit/instrumentation/haystack/haystack.py +2 -2
  39. openlit/instrumentation/julep/async_julep.py +2 -2
  40. openlit/instrumentation/julep/julep.py +2 -2
  41. openlit/instrumentation/langchain/langchain.py +36 -31
  42. openlit/instrumentation/letta/letta.py +6 -6
  43. openlit/instrumentation/litellm/async_litellm.py +20 -20
  44. openlit/instrumentation/litellm/litellm.py +20 -20
  45. openlit/instrumentation/llamaindex/llamaindex.py +2 -2
  46. openlit/instrumentation/mem0/mem0.py +2 -2
  47. openlit/instrumentation/milvus/milvus.py +4 -4
  48. openlit/instrumentation/mistral/async_mistral.py +18 -18
  49. openlit/instrumentation/mistral/mistral.py +18 -18
  50. openlit/instrumentation/multion/async_multion.py +2 -2
  51. openlit/instrumentation/multion/multion.py +2 -2
  52. openlit/instrumentation/ollama/async_ollama.py +29 -29
  53. openlit/instrumentation/ollama/ollama.py +29 -29
  54. openlit/instrumentation/openai/__init__.py +11 -230
  55. openlit/instrumentation/openai/async_openai.py +434 -409
  56. openlit/instrumentation/openai/openai.py +415 -393
  57. openlit/instrumentation/phidata/phidata.py +2 -2
  58. openlit/instrumentation/pinecone/pinecone.py +4 -4
  59. openlit/instrumentation/premai/premai.py +20 -20
  60. openlit/instrumentation/qdrant/async_qdrant.py +4 -4
  61. openlit/instrumentation/qdrant/qdrant.py +4 -4
  62. openlit/instrumentation/reka/async_reka.py +6 -6
  63. openlit/instrumentation/reka/reka.py +6 -6
  64. openlit/instrumentation/together/async_together.py +18 -18
  65. openlit/instrumentation/together/together.py +18 -18
  66. openlit/instrumentation/transformers/transformers.py +6 -6
  67. openlit/instrumentation/vertexai/async_vertexai.py +53 -53
  68. openlit/instrumentation/vertexai/vertexai.py +53 -53
  69. openlit/instrumentation/vllm/vllm.py +6 -6
  70. openlit/otel/metrics.py +98 -7
  71. openlit/semcov/__init__.py +113 -80
  72. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/METADATA +2 -1
  73. openlit-1.33.9.dist-info/RECORD +121 -0
  74. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
  75. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  76. openlit/instrumentation/openai/azure_openai.py +0 -898
  77. openlit-1.33.7.dist-info/RECORD +0 -122
  78. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,15 +1,19 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches, too-many-instance-attributes, inconsistent-return-statements
2
1
  """
3
2
  Module for monitoring AI21 calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
10
  get_chat_model_cost,
11
11
  handle_exception,
12
12
  response_as_dict,
13
+ calculate_ttft,
14
+ calculate_tbt,
15
+ create_metrics_attributes,
16
+ set_server_address_and_port,
13
17
  general_tokens
14
18
  )
15
19
  from openlit.semcov import SemanticConvetion
@@ -17,13 +21,12 @@ from openlit.semcov import SemanticConvetion
17
21
  # Initialize logger for logging potential issues and operations
18
22
  logger = logging.getLogger(__name__)
19
23
 
20
- def chat(gen_ai_endpoint, version, environment, application_name,
24
+ def chat(version, environment, application_name,
21
25
  tracer, pricing_info, trace_content, metrics, disable_metrics):
22
26
  """
23
27
  Generates a telemetry wrapper for chat completions to collect metrics.
24
28
 
25
29
  Args:
26
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
27
30
  version: Version of the monitoring package.
28
31
  environment: Deployment environment (e.g., production, staging).
29
32
  application_name: Name of the application using the AI21 SDK.
@@ -38,6 +41,7 @@ def chat(gen_ai_endpoint, version, environment, application_name,
38
41
  class TracedSyncStream:
39
42
  """
40
43
  Wrapper for streaming responses to collect metrics and trace data.
44
+ Wraps the 'ai21.AsyncStream' response to collect message IDs and aggregated response.
41
45
 
42
46
  This class implements the '__aiter__' and '__anext__' methods that
43
47
  handle asynchronous streaming responses.
@@ -50,6 +54,8 @@ def chat(gen_ai_endpoint, version, environment, application_name,
50
54
  wrapped,
51
55
  span,
52
56
  kwargs,
57
+ server_address,
58
+ server_port,
53
59
  **args,
54
60
  ):
55
61
  self.__wrapped__ = wrapped
@@ -57,11 +63,19 @@ def chat(gen_ai_endpoint, version, environment, application_name,
57
63
  # Placeholder for aggregating streaming response
58
64
  self._llmresponse = ""
59
65
  self._response_id = ""
60
- self._prompt_tokens = 0
61
- self._completion_tokens = 0
66
+ self._finish_reason = ""
67
+ self._input_tokens = 0
68
+ self._output_tokens = 0
62
69
 
63
70
  self._args = args
64
71
  self._kwargs = kwargs
72
+ self._start_time = time.time()
73
+ self._end_time = None
74
+ self._timestamps = []
75
+ self._ttft = 0
76
+ self._tbt = 0
77
+ self._server_address = server_address
78
+ self._server_port = server_port
65
79
 
66
80
  def __enter__(self):
67
81
  self.__wrapped__.__enter__()
@@ -80,8 +94,15 @@ def chat(gen_ai_endpoint, version, environment, application_name,
80
94
  def __next__(self):
81
95
  try:
82
96
  chunk = self.__wrapped__.__next__()
97
+ end_time = time.time()
98
+ # Record the timestamp for the current chunk
99
+ self._timestamps.append(end_time)
100
+
101
+ if len(self._timestamps) == 1:
102
+ # Calculate time to first chunk
103
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
104
+
83
105
  chunked = response_as_dict(chunk)
84
- # Collect message IDs and aggregated response from events
85
106
  if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
86
107
  'content' in chunked.get('choices')[0].get('delta'))):
87
108
 
@@ -90,14 +111,19 @@ def chat(gen_ai_endpoint, version, environment, application_name,
90
111
  self._llmresponse += content
91
112
 
92
113
  if chunked.get('usage'):
93
- self._prompt_tokens = chunked.get('usage').get("prompt_tokens")
94
- self._completion_tokens = chunked.get('usage').get("completion_tokens")
114
+ self._input_tokens = chunked.get('usage').get("prompt_tokens")
115
+ self._output_tokens = chunked.get('usage').get("completion_tokens")
95
116
 
96
117
  self._response_id = chunked.get('id')
118
+ self._finish_reason = chunked.get('choices')[0].get('finish_reason')
97
119
  return chunk
98
120
  except StopIteration:
99
121
  # Handling exception ensure observability without disrupting operation
100
122
  try:
123
+ self._end_time = time.time()
124
+ if len(self._timestamps) > 1:
125
+ self._tbt = calculate_tbt(self._timestamps)
126
+
101
127
  # Format 'messages' into a single string
102
128
  message_prompt = self._kwargs.get("messages", "")
103
129
  formatted_messages = []
@@ -107,7 +133,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
107
133
 
108
134
  if isinstance(content, list):
109
135
  content_str = ", ".join(
110
- # pylint: disable=line-too-long
111
136
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
112
137
  if "type" in item else f'text: {item["text"]}'
113
138
  for item in content
@@ -117,43 +142,74 @@ def chat(gen_ai_endpoint, version, environment, application_name,
117
142
  formatted_messages.append(f"{role}: {content}")
118
143
  prompt = "\n".join(formatted_messages)
119
144
 
145
+ request_model = self._kwargs.get("model", "jamba-1.5-mini")
146
+
120
147
  # Calculate cost of the operation
121
- cost = get_chat_model_cost(self._kwargs.get("model", "jamba-1.5-mini"),
122
- pricing_info, self._prompt_tokens,
123
- self._completion_tokens)
148
+ cost = get_chat_model_cost(request_model,
149
+ pricing_info, self._input_tokens,
150
+ self._output_tokens)
124
151
 
125
- # Set Span attributes
152
+ # Set Span attributes (OTel Semconv)
126
153
  self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
154
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
155
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
127
156
  self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
128
157
  SemanticConvetion.GEN_AI_SYSTEM_AI21)
129
- self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
130
- SemanticConvetion.GEN_AI_TYPE_CHAT)
131
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
132
- gen_ai_endpoint)
133
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
134
- self._response_id)
135
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
136
- environment)
137
- self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
138
- application_name)
139
158
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
140
- self._kwargs.get("model", "jamba-1.5-mini"))
141
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
142
- self._kwargs.get("top_p", 1.0))
159
+ request_model)
160
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
161
+ self._kwargs.get("seed", ""))
162
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
163
+ self._server_port)
164
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
165
+ self._kwargs.get("frequency_penalty", 0.0))
143
166
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
144
167
  self._kwargs.get("max_tokens", -1))
168
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
169
+ self._kwargs.get("presence_penalty", 0.0))
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
171
+ self._kwargs.get("stop", []))
145
172
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
146
- self._kwargs.get("temperature", 1.0))
173
+ self._kwargs.get("temperature", 0.4))
174
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
175
+ self._kwargs.get("top_p", 1.0))
176
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
177
+ [self._finish_reason])
178
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
179
+ self._response_id)
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
181
+ request_model)
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
183
+ self._input_tokens)
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
185
+ self._output_tokens)
186
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
187
+ self._server_address)
188
+
189
+ if isinstance(self._llmresponse, str):
190
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
191
+ "text")
192
+ else:
193
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
194
+ "json")
195
+
196
+ # Set Span attributes (Extra)
197
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
198
+ environment)
199
+ self._span.set_attribute(SERVICE_NAME,
200
+ application_name)
147
201
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
148
202
  True)
149
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
150
- self._prompt_tokens)
151
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
152
- self._completion_tokens)
153
203
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
154
- self._prompt_tokens + self._completion_tokens)
204
+ self._input_tokens + self._output_tokens)
155
205
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
156
206
  cost)
207
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
208
+ self._tbt)
209
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
210
+ self._ttft)
211
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
212
+ version)
157
213
  if trace_content:
158
214
  self._span.add_event(
159
215
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -167,31 +223,35 @@ def chat(gen_ai_endpoint, version, environment, application_name,
167
223
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
168
224
  },
169
225
  )
170
-
171
226
  self._span.set_status(Status(StatusCode.OK))
172
227
 
173
228
  if disable_metrics is False:
174
- attributes = {
175
- TELEMETRY_SDK_NAME:
176
- "openlit",
177
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
178
- application_name,
179
- SemanticConvetion.GEN_AI_SYSTEM:
180
- SemanticConvetion.GEN_AI_SYSTEM_AI21,
181
- SemanticConvetion.GEN_AI_ENVIRONMENT:
182
- environment,
183
- SemanticConvetion.GEN_AI_TYPE:
184
- SemanticConvetion.GEN_AI_TYPE_CHAT,
185
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
186
- self._kwargs.get("model", "jamba-1.5-mini")
187
- }
229
+ attributes = create_metrics_attributes(
230
+ service_name=application_name,
231
+ deployment_environment=environment,
232
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
233
+ system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
234
+ request_model=request_model,
235
+ server_address=self._server_address,
236
+ server_port=self._server_port,
237
+ response_model=request_model,
238
+ )
188
239
 
189
- metrics["genai_requests"].add(1, attributes)
190
- metrics["genai_total_tokens"].add(
191
- self._prompt_tokens + self._completion_tokens, attributes
240
+ metrics["genai_client_usage_tokens"].record(
241
+ self._input_tokens + self._output_tokens, attributes
242
+ )
243
+ metrics["genai_client_operation_duration"].record(
244
+ self._end_time - self._start_time, attributes
192
245
  )
193
- metrics["genai_completion_tokens"].add(self._completion_tokens, attributes)
194
- metrics["genai_prompt_tokens"].add(self._prompt_tokens, attributes)
246
+ metrics["genai_server_tbt"].record(
247
+ self._tbt, attributes
248
+ )
249
+ metrics["genai_server_ttft"].record(
250
+ self._ttft, attributes
251
+ )
252
+ metrics["genai_requests"].add(1, attributes)
253
+ metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
254
+ metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
195
255
  metrics["genai_cost"].record(cost, attributes)
196
256
 
197
257
  except Exception as e:
@@ -220,20 +280,25 @@ def chat(gen_ai_endpoint, version, environment, application_name,
220
280
 
221
281
  # Check if streaming is enabled for the API call
222
282
  streaming = kwargs.get("stream", False)
283
+ server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
284
+ request_model = kwargs.get("model", "jamba-1.5-mini")
285
+
286
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
223
287
 
224
288
  # pylint: disable=no-else-return
225
289
  if streaming:
226
290
  # Special handling for streaming response to accommodate the nature of data flow
227
291
  awaited_wrapped = wrapped(*args, **kwargs)
228
- span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
292
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
229
293
 
230
- return TracedSyncStream(awaited_wrapped, span, kwargs)
294
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
231
295
 
232
296
  # Handling for non-streaming responses
233
297
  else:
234
- # pylint: disable=line-too-long
235
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
298
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
299
+ start_time = time.time()
236
300
  response = wrapped(*args, **kwargs)
301
+ end_time = time.time()
237
302
 
238
303
  response_dict = response_as_dict(response)
239
304
 
@@ -247,7 +312,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
247
312
 
248
313
  if isinstance(content, list):
249
314
  content_str = ", ".join(
250
- # pylint: disable=line-too-long
251
315
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
252
316
  if "type" in item else f'text: {item["text"]}'
253
317
  for item in content
@@ -257,30 +321,64 @@ def chat(gen_ai_endpoint, version, environment, application_name,
257
321
  formatted_messages.append(f"{role}: {content}")
258
322
  prompt = "\n".join(formatted_messages)
259
323
 
260
- # Set base span attribues
324
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
325
+ output_tokens = response_dict.get('usage').get('completion_tokens')
326
+
327
+ # Calculate cost of the operation
328
+ cost = get_chat_model_cost(request_model,
329
+ pricing_info, input_tokens,
330
+ output_tokens)
331
+
332
+ # Set base span attribues (OTel Semconv)
261
333
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
334
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
335
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
262
336
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
263
337
  SemanticConvetion.GEN_AI_SYSTEM_AI21)
264
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
265
- SemanticConvetion.GEN_AI_TYPE_CHAT)
266
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
267
- gen_ai_endpoint)
268
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
269
- response_dict.get("id"))
270
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
271
- environment)
272
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
273
- application_name)
274
338
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
275
- kwargs.get("model", "jamba-1.5-mini"))
276
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
277
- kwargs.get("top_p", 1.0))
339
+ request_model)
340
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
341
+ kwargs.get("seed", ""))
342
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
343
+ server_port)
344
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
345
+ kwargs.get("frequency_penalty", 0.0))
278
346
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
279
347
  kwargs.get("max_tokens", -1))
348
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
349
+ kwargs.get("presence_penalty", 0.0))
350
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
351
+ kwargs.get("stop", []))
280
352
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
281
- kwargs.get("temperature", 1.0))
353
+ kwargs.get("temperature", 0.4))
354
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
355
+ kwargs.get("top_p", 1.0))
356
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
357
+ response_dict.get("id"))
358
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
359
+ request_model)
360
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
361
+ input_tokens)
362
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
363
+ output_tokens)
364
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
365
+ server_address)
366
+
367
+ # Set base span attribues (Extras)
368
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
369
+ environment)
370
+ span.set_attribute(SERVICE_NAME,
371
+ application_name)
282
372
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
283
373
  False)
374
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
375
+ input_tokens + output_tokens)
376
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
377
+ cost)
378
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
379
+ end_time - start_time)
380
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
381
+ version)
284
382
  if trace_content:
285
383
  span.add_event(
286
384
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -289,93 +387,54 @@ def chat(gen_ai_endpoint, version, environment, application_name,
289
387
  },
290
388
  )
291
389
 
292
- # Set span attributes when tools is not passed to the function call
293
- if "tools" not in kwargs:
294
- # Calculate cost of the operation
295
- cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
296
- pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
297
- response_dict.get('usage', {}).get('completion_tokens', None))
298
-
299
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
300
- response_dict.get('usage', {}).get('prompt_tokens', None))
301
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
302
- response_dict.get('usage', {}).get('completion_tokens', None))
303
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
304
- response_dict.get('usage', {}).get('total_tokens', None))
390
+ for i in range(kwargs.get('n',1)):
305
391
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
306
- [response_dict.get('choices', [])[0].get('finish_reason', None)])
307
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
308
- cost)
309
-
310
- # Set span attributes for when n = 1 (default)
311
- if "n" not in kwargs or kwargs["n"] == 1:
312
- if trace_content:
313
- span.add_event(
314
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
315
- attributes={
316
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
317
- },
318
- )
319
-
320
- # Set span attributes for when n > 0
321
- else:
322
- i = 0
323
- while i < kwargs["n"] and trace_content is True:
324
- attribute_name = f"gen_ai.content.completion.{i}"
325
- span.add_event(
326
- name=attribute_name,
327
- attributes={
328
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
329
- },
330
- )
331
- i += 1
332
-
333
- # Return original response
334
- return response
335
-
336
- # Set span attributes when tools is passed to the function call
337
- elif "tools" in kwargs:
338
- # Calculate cost of the operation
339
- cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
340
- pricing_info, response_dict.get('usage').get('prompt_tokens'),
341
- response_dict.get('usage').get('completion_tokens'))
342
- span.add_event(
343
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
344
- attributes={
345
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
346
- },
347
- )
348
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
349
- response_dict.get('usage').get('prompt_tokens'))
350
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
351
- response_dict.get('usage').get('completion_tokens'))
352
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
353
- response_dict.get('usage').get('total_tokens'))
354
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
355
- cost)
392
+ [response_dict.get('choices')[i].get('finish_reason')])
393
+ if trace_content:
394
+ span.add_event(
395
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
396
+ attributes={
397
+ # pylint: disable=line-too-long
398
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
399
+ },
400
+ )
401
+ if kwargs.get('tools'):
402
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
403
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
404
+
405
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
406
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
407
+ "text")
408
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
409
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
410
+ "json")
356
411
 
357
412
  span.set_status(Status(StatusCode.OK))
358
413
 
359
414
  if disable_metrics is False:
360
- attributes = {
361
- TELEMETRY_SDK_NAME:
362
- "openlit",
363
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
364
- application_name,
365
- SemanticConvetion.GEN_AI_SYSTEM:
366
- SemanticConvetion.GEN_AI_SYSTEM_AI21,
367
- SemanticConvetion.GEN_AI_ENVIRONMENT:
368
- environment,
369
- SemanticConvetion.GEN_AI_TYPE:
370
- SemanticConvetion.GEN_AI_TYPE_CHAT,
371
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
372
- kwargs.get("model", "jamba-1.5-mini")
373
- }
415
+ attributes = create_metrics_attributes(
416
+ service_name=application_name,
417
+ deployment_environment=environment,
418
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
419
+ system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
420
+ request_model=request_model,
421
+ server_address=server_address,
422
+ server_port=server_port,
423
+ response_model=request_model,
424
+ )
374
425
 
426
+ metrics["genai_client_usage_tokens"].record(
427
+ input_tokens + output_tokens, attributes
428
+ )
429
+ metrics["genai_client_operation_duration"].record(
430
+ end_time - start_time, attributes
431
+ )
432
+ metrics["genai_server_ttft"].record(
433
+ end_time - start_time, attributes
434
+ )
375
435
  metrics["genai_requests"].add(1, attributes)
376
- metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
377
- metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
378
- metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
436
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
437
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
379
438
  metrics["genai_cost"].record(cost, attributes)
380
439
 
381
440
  # Return original response
@@ -390,13 +449,12 @@ def chat(gen_ai_endpoint, version, environment, application_name,
390
449
 
391
450
  return wrapper
392
451
 
393
- def chat_rag(gen_ai_endpoint, version, environment, application_name,
452
+ def chat_rag(version, environment, application_name,
394
453
  tracer, pricing_info, trace_content, metrics, disable_metrics):
395
454
  """
396
455
  Generates a telemetry wrapper for chat completions to collect metrics.
397
456
 
398
457
  Args:
399
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
400
458
  version: Version of the monitoring package.
401
459
  environment: Deployment environment (e.g., production, staging).
402
460
  application_name: Name of the application using the AI21 SDK.
@@ -425,180 +483,173 @@ def chat_rag(gen_ai_endpoint, version, environment, application_name,
425
483
  The response from the original 'chat.completions' method.
426
484
  """
427
485
 
428
- # Check if streaming is enabled for the API call
429
- streaming = kwargs.get("stream", False)
430
-
431
- # pylint: disable=no-else-return
432
- if streaming:
433
- # # Special handling for streaming response to accommodate the nature of data flow
434
- # awaited_wrapped = wrapped(*args, **kwargs)
435
- # span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
436
-
437
- # return TracedSyncStream(awaited_wrapped, span, kwargs)
438
-
439
- return
440
-
441
- # Handling for non-streaming responses
442
- else:
443
- # pylint: disable=line-too-long
444
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
445
- response = wrapped(*args, **kwargs)
446
-
447
- response_dict = response_as_dict(response)
486
+ server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
487
+ request_model = kwargs.get("model", "jamba-1.5-mini")
448
488
 
449
- try:
450
- # Format 'messages' into a single string
451
- message_prompt = kwargs.get("messages", "")
452
- formatted_messages = []
453
- for message in message_prompt:
454
- role = message.role
455
- content = message.content
489
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
456
490
 
457
- if isinstance(content, list):
458
- content_str = ", ".join(
459
- # pylint: disable=line-too-long
460
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
461
- if "type" in item else f'text: {item["text"]}'
462
- for item in content
463
- )
464
- formatted_messages.append(f"{role}: {content_str}")
465
- else:
466
- formatted_messages.append(f"{role}: {content}")
467
- prompt = "\n".join(formatted_messages)
491
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
492
+ start_time = time.time()
493
+ response = wrapped(*args, **kwargs)
494
+ end_time = time.time()
468
495
 
469
- # Set base span attribues
470
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
471
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
472
- SemanticConvetion.GEN_AI_SYSTEM_AI21)
473
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
474
- SemanticConvetion.GEN_AI_TYPE_CHAT)
475
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
476
- gen_ai_endpoint)
477
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
478
- response_dict.get("id"))
479
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
480
- environment)
481
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
482
- application_name)
483
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
484
- kwargs.get("model", "jamba-1.5-mini"))
485
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
486
- False)
487
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
488
- kwargs.get("max_segments", -1))
489
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
490
- kwargs.get("retrieval_strategy", "segments"))
491
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
492
- kwargs.get("retrieval_similarity_threshold", -1))
493
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
494
- kwargs.get("max_neighbors", -1))
495
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
496
- str(kwargs.get("file_ids", "")))
497
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
498
- kwargs.get("path", ""))
496
+ response_dict = response_as_dict(response)
499
497
 
500
- if trace_content:
501
- span.add_event(
502
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
503
- attributes={
504
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
505
- },
498
+ try:
499
+ # Format 'messages' into a single string
500
+ message_prompt = kwargs.get("messages", "")
501
+ formatted_messages = []
502
+ for message in message_prompt:
503
+ role = message.role
504
+ content = message.content
505
+
506
+ if isinstance(content, list):
507
+ content_str = ", ".join(
508
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
509
+ if "type" in item else f'text: {item["text"]}'
510
+ for item in content
506
511
  )
507
- prompt_tokens = general_tokens(prompt)
508
-
509
-
510
- # Set span attributes when tools is not passed to the function call
511
- if "tools" not in kwargs:
512
- prompt_tokens = general_tokens(prompt)
513
-
514
- # Set span attributes for when n = 1 (default)
515
- if "n" not in kwargs or kwargs["n"] == 1:
516
- completion_tokens = general_tokens(response_dict.get('choices', [])[0].get("content"))
517
- if trace_content:
518
- span.add_event(
519
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
520
- attributes={
521
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("content"),
522
- },
523
- )
512
+ formatted_messages.append(f"{role}: {content_str}")
513
+ else:
514
+ formatted_messages.append(f"{role}: {content}")
515
+ prompt = "\n".join(formatted_messages)
516
+
517
+ input_tokens = general_tokens(prompt)
518
+
519
+ # Set base span attribues (OTel Semconv)
520
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
521
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
522
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
523
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
524
+ SemanticConvetion.GEN_AI_SYSTEM_AI21)
525
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
526
+ request_model)
527
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
528
+ kwargs.get("seed", ""))
529
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
530
+ server_port)
531
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
532
+ kwargs.get("frequency_penalty", 0.0))
533
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
534
+ kwargs.get("max_tokens", -1))
535
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
536
+ kwargs.get("presence_penalty", 0.0))
537
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
538
+ kwargs.get("stop", []))
539
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
540
+ kwargs.get("temperature", 0.4))
541
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
542
+ kwargs.get("top_p", 1.0))
543
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
544
+ response_dict.get("id"))
545
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
546
+ request_model)
547
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
548
+ input_tokens)
549
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
550
+ server_address)
551
+
552
+ # Set base span attribues (Extras)
553
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
554
+ environment)
555
+ span.set_attribute(SERVICE_NAME,
556
+ application_name)
557
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
558
+ False)
559
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
560
+ end_time - start_time)
561
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
562
+ version)
563
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
564
+ kwargs.get("max_segments", -1))
565
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
566
+ kwargs.get("retrieval_strategy", "segments"))
567
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
568
+ kwargs.get("retrieval_similarity_threshold", -1))
569
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
570
+ kwargs.get("max_neighbors", -1))
571
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
572
+ str(kwargs.get("file_ids", "")))
573
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
574
+ kwargs.get("path", ""))
575
+ if trace_content:
576
+ span.add_event(
577
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
578
+ attributes={
579
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
580
+ },
581
+ )
582
+
583
+ output_tokens = 0
584
+ for i in range(kwargs.get('n',1)):
585
+ output_tokens += general_tokens(response_dict.get('choices')[i].get('content'))
524
586
 
525
- # Set span attributes for when n > 0
526
- else:
527
- i = 0
528
- completion_tokens = 0
529
- while i < kwargs["n"] and trace_content is True:
530
- completion_tokens += general_tokens(response_dict.get('choices')[i].get("message").get("content"))
531
- attribute_name = f"gen_ai.content.completion.{i}"
532
- span.add_event(
533
- name=attribute_name,
534
- attributes={
535
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
536
- },
537
- )
538
- i += 1
539
-
540
- # Return original response
541
- return response
542
-
543
- # Set span attributes when tools is passed to the function call
544
- elif "tools" in kwargs:
545
- completion_tokens = -1
546
- # Calculate cost of the operation
547
- cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
548
- pricing_info, response_dict.get('usage').get('prompt_tokens'),
549
- response_dict.get('usage').get('completion_tokens'))
587
+ if trace_content:
550
588
  span.add_event(
551
589
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
552
590
  attributes={
553
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
591
+ # pylint: disable=line-too-long
592
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('content')),
554
593
  },
555
594
  )
556
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
557
- prompt_tokens)
558
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
559
- completion_tokens)
560
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
561
- prompt_tokens + completion_tokens)
562
-
563
- # Calculate cost of the operation
564
- cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
565
- pricing_info, prompt_tokens,
566
- completion_tokens)
567
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
568
- cost)
569
-
570
- span.set_status(Status(StatusCode.OK))
571
-
572
- if disable_metrics is False:
573
- attributes = {
574
- TELEMETRY_SDK_NAME:
575
- "openlit",
576
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
577
- application_name,
578
- SemanticConvetion.GEN_AI_SYSTEM:
579
- SemanticConvetion.GEN_AI_SYSTEM_AI21,
580
- SemanticConvetion.GEN_AI_ENVIRONMENT:
581
- environment,
582
- SemanticConvetion.GEN_AI_TYPE:
583
- SemanticConvetion.GEN_AI_TYPE_CHAT,
584
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
585
- kwargs.get("model", "jamba-1.5-mini")
586
- }
587
-
588
- metrics["genai_requests"].add(1, attributes)
589
- metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
590
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
591
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
592
- metrics["genai_cost"].record(cost, attributes)
593
-
594
- # Return original response
595
- return response
596
-
597
- except Exception as e:
598
- handle_exception(span, e)
599
- logger.error("Error in trace creation: %s", e)
600
-
601
- # Return original response
602
- return response
595
+ if kwargs.get('tools'):
596
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
597
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
598
+
599
+ if isinstance(response_dict.get('choices')[i].get('content'), str):
600
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
601
+ "text")
602
+ elif response_dict.get('choices')[i].get('content') is not None:
603
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
604
+ "json")
605
+
606
+ # Calculate cost of the operation
607
+ cost = get_chat_model_cost(request_model,
608
+ pricing_info, input_tokens,
609
+ output_tokens)
610
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
611
+ cost)
612
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
613
+ output_tokens)
614
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
615
+ input_tokens + output_tokens)
616
+
617
+ span.set_status(Status(StatusCode.OK))
618
+
619
+ if disable_metrics is False:
620
+ attributes = create_metrics_attributes(
621
+ service_name=application_name,
622
+ deployment_environment=environment,
623
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
624
+ system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
625
+ request_model=request_model,
626
+ server_address=server_address,
627
+ server_port=server_port,
628
+ response_model=request_model,
629
+ )
630
+
631
+ metrics["genai_client_usage_tokens"].record(
632
+ input_tokens + output_tokens, attributes
633
+ )
634
+ metrics["genai_client_operation_duration"].record(
635
+ end_time - start_time, attributes
636
+ )
637
+ metrics["genai_server_ttft"].record(
638
+ end_time - start_time, attributes
639
+ )
640
+ metrics["genai_requests"].add(1, attributes)
641
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
642
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
643
+ metrics["genai_cost"].record(cost, attributes)
644
+
645
+ # Return original response
646
+ return response
647
+
648
+ except Exception as e:
649
+ handle_exception(span, e)
650
+ logger.error("Error in trace creation: %s", e)
651
+
652
+ # Return original response
653
+ return response
603
654
 
604
655
  return wrapper