openlit 1.33.7__py3-none-any.whl → 1.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. openlit/__helpers.py +83 -0
  2. openlit/__init__.py +1 -1
  3. openlit/instrumentation/ag2/ag2.py +2 -2
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +4 -4
  13. openlit/instrumentation/astra/async_astra.py +4 -4
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +4 -4
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +2 -2
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
  26. openlit/instrumentation/crewai/crewai.py +2 -2
  27. openlit/instrumentation/dynamiq/dynamiq.py +2 -2
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
  30. openlit/instrumentation/embedchain/embedchain.py +4 -4
  31. openlit/instrumentation/firecrawl/firecrawl.py +2 -2
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/gpt4all.py +17 -17
  36. openlit/instrumentation/groq/async_groq.py +14 -14
  37. openlit/instrumentation/groq/groq.py +14 -14
  38. openlit/instrumentation/haystack/haystack.py +2 -2
  39. openlit/instrumentation/julep/async_julep.py +2 -2
  40. openlit/instrumentation/julep/julep.py +2 -2
  41. openlit/instrumentation/langchain/langchain.py +36 -31
  42. openlit/instrumentation/letta/letta.py +6 -6
  43. openlit/instrumentation/litellm/async_litellm.py +20 -20
  44. openlit/instrumentation/litellm/litellm.py +20 -20
  45. openlit/instrumentation/llamaindex/llamaindex.py +2 -2
  46. openlit/instrumentation/mem0/mem0.py +2 -2
  47. openlit/instrumentation/milvus/milvus.py +4 -4
  48. openlit/instrumentation/mistral/async_mistral.py +18 -18
  49. openlit/instrumentation/mistral/mistral.py +18 -18
  50. openlit/instrumentation/multion/async_multion.py +2 -2
  51. openlit/instrumentation/multion/multion.py +2 -2
  52. openlit/instrumentation/ollama/async_ollama.py +29 -29
  53. openlit/instrumentation/ollama/ollama.py +29 -29
  54. openlit/instrumentation/openai/__init__.py +11 -230
  55. openlit/instrumentation/openai/async_openai.py +434 -409
  56. openlit/instrumentation/openai/openai.py +415 -393
  57. openlit/instrumentation/phidata/phidata.py +2 -2
  58. openlit/instrumentation/pinecone/pinecone.py +4 -4
  59. openlit/instrumentation/premai/premai.py +20 -20
  60. openlit/instrumentation/qdrant/async_qdrant.py +4 -4
  61. openlit/instrumentation/qdrant/qdrant.py +4 -4
  62. openlit/instrumentation/reka/async_reka.py +6 -6
  63. openlit/instrumentation/reka/reka.py +6 -6
  64. openlit/instrumentation/together/async_together.py +18 -18
  65. openlit/instrumentation/together/together.py +18 -18
  66. openlit/instrumentation/transformers/transformers.py +6 -6
  67. openlit/instrumentation/vertexai/async_vertexai.py +53 -53
  68. openlit/instrumentation/vertexai/vertexai.py +53 -53
  69. openlit/instrumentation/vllm/vllm.py +6 -6
  70. openlit/otel/metrics.py +98 -7
  71. openlit/semcov/__init__.py +113 -80
  72. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/METADATA +2 -1
  73. openlit-1.33.9.dist-info/RECORD +121 -0
  74. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
  75. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  76. openlit/instrumentation/openai/azure_openai.py +0 -898
  77. openlit-1.33.7.dist-info/RECORD +0 -122
  78. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,15 +1,19 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches, too-many-instance-attributes, inconsistent-return-statements
2
1
  """
3
2
  Module for monitoring AI21 calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
10
  get_chat_model_cost,
11
11
  handle_exception,
12
12
  response_as_dict,
13
+ calculate_ttft,
14
+ calculate_tbt,
15
+ create_metrics_attributes,
16
+ set_server_address_and_port,
13
17
  general_tokens
14
18
  )
15
19
  from openlit.semcov import SemanticConvetion
@@ -17,13 +21,12 @@ from openlit.semcov import SemanticConvetion
17
21
  # Initialize logger for logging potential issues and operations
18
22
  logger = logging.getLogger(__name__)
19
23
 
20
- def async_chat(gen_ai_endpoint, version, environment, application_name,
24
+ def async_chat(version, environment, application_name,
21
25
  tracer, pricing_info, trace_content, metrics, disable_metrics):
22
26
  """
23
27
  Generates a telemetry wrapper for chat completions to collect metrics.
24
28
 
25
29
  Args:
26
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
27
30
  version: Version of the monitoring package.
28
31
  environment: Deployment environment (e.g., production, staging).
29
32
  application_name: Name of the application using the AI21 SDK.
@@ -38,6 +41,7 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
38
41
  class TracedAsyncStream:
39
42
  """
40
43
  Wrapper for streaming responses to collect metrics and trace data.
44
+ Wraps the 'ai21.AsyncStream' response to collect message IDs and aggregated response.
41
45
 
42
46
  This class implements the '__aiter__' and '__anext__' methods that
43
47
  handle asynchronous streaming responses.
@@ -50,6 +54,8 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
50
54
  wrapped,
51
55
  span,
52
56
  kwargs,
57
+ server_address,
58
+ server_port,
53
59
  **args,
54
60
  ):
55
61
  self.__wrapped__ = wrapped
@@ -57,11 +63,19 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
57
63
  # Placeholder for aggregating streaming response
58
64
  self._llmresponse = ""
59
65
  self._response_id = ""
60
- self._prompt_tokens = 0
61
- self._completion_tokens = 0
66
+ self._finish_reason = ""
67
+ self._input_tokens = 0
68
+ self._output_tokens = 0
62
69
 
63
70
  self._args = args
64
71
  self._kwargs = kwargs
72
+ self._start_time = time.time()
73
+ self._end_time = None
74
+ self._timestamps = []
75
+ self._ttft = 0
76
+ self._tbt = 0
77
+ self._server_address = server_address
78
+ self._server_port = server_port
65
79
 
66
80
  async def __aenter__(self):
67
81
  await self.__wrapped__.__aenter__()
@@ -80,8 +94,15 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
80
94
  async def __anext__(self):
81
95
  try:
82
96
  chunk = await self.__wrapped__.__anext__()
97
+ end_time = time.time()
98
+ # Record the timestamp for the current chunk
99
+ self._timestamps.append(end_time)
100
+
101
+ if len(self._timestamps) == 1:
102
+ # Calculate time to first chunk
103
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
104
+
83
105
  chunked = response_as_dict(chunk)
84
- # Collect message IDs and aggregated response from events
85
106
  if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
86
107
  'content' in chunked.get('choices')[0].get('delta'))):
87
108
 
@@ -90,14 +111,19 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
90
111
  self._llmresponse += content
91
112
 
92
113
  if chunked.get('usage'):
93
- self._prompt_tokens = chunked.get('usage').get("prompt_tokens")
94
- self._completion_tokens = chunked.get('usage').get("completion_tokens")
114
+ self._input_tokens = chunked.get('usage').get("prompt_tokens")
115
+ self._output_tokens = chunked.get('usage').get("completion_tokens")
95
116
 
96
117
  self._response_id = chunked.get('id')
118
+ self._finish_reason = chunked.get('choices')[0].get('finish_reason')
97
119
  return chunk
98
120
  except StopAsyncIteration:
99
121
  # Handling exception ensure observability without disrupting operation
100
122
  try:
123
+ self._end_time = time.time()
124
+ if len(self._timestamps) > 1:
125
+ self._tbt = calculate_tbt(self._timestamps)
126
+
101
127
  # Format 'messages' into a single string
102
128
  message_prompt = self._kwargs.get("messages", "")
103
129
  formatted_messages = []
@@ -107,7 +133,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
107
133
 
108
134
  if isinstance(content, list):
109
135
  content_str = ", ".join(
110
- # pylint: disable=line-too-long
111
136
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
112
137
  if "type" in item else f'text: {item["text"]}'
113
138
  for item in content
@@ -117,43 +142,74 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
117
142
  formatted_messages.append(f"{role}: {content}")
118
143
  prompt = "\n".join(formatted_messages)
119
144
 
145
+ request_model = self._kwargs.get("model", "jamba-1.5-mini")
146
+
120
147
  # Calculate cost of the operation
121
- cost = get_chat_model_cost(self._kwargs.get("model", "jamba-1.5-mini"),
122
- pricing_info, self._prompt_tokens,
123
- self._completion_tokens)
148
+ cost = get_chat_model_cost(request_model,
149
+ pricing_info, self._input_tokens,
150
+ self._output_tokens)
124
151
 
125
- # Set Span attributes
152
+ # Set Span attributes (OTel Semconv)
126
153
  self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
154
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
155
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
127
156
  self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
128
157
  SemanticConvetion.GEN_AI_SYSTEM_AI21)
129
- self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
130
- SemanticConvetion.GEN_AI_TYPE_CHAT)
131
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
132
- gen_ai_endpoint)
133
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
134
- self._response_id)
135
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
136
- environment)
137
- self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
138
- application_name)
139
158
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
140
- self._kwargs.get("model", "jamba-1.5-mini"))
141
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
142
- self._kwargs.get("top_p", 1.0))
159
+ request_model)
160
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
161
+ self._kwargs.get("seed", ""))
162
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
163
+ self._server_port)
164
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
165
+ self._kwargs.get("frequency_penalty", 0.0))
143
166
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
144
167
  self._kwargs.get("max_tokens", -1))
168
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
169
+ self._kwargs.get("presence_penalty", 0.0))
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
171
+ self._kwargs.get("stop", []))
145
172
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
146
- self._kwargs.get("temperature", 1.0))
173
+ self._kwargs.get("temperature", 0.4))
174
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
175
+ self._kwargs.get("top_p", 1.0))
176
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
177
+ [self._finish_reason])
178
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
179
+ self._response_id)
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
181
+ request_model)
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
183
+ self._input_tokens)
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
185
+ self._output_tokens)
186
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
187
+ self._server_address)
188
+
189
+ if isinstance(self._llmresponse, str):
190
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
191
+ "text")
192
+ else:
193
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
194
+ "json")
195
+
196
+ # Set Span attributes (Extra)
197
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
198
+ environment)
199
+ self._span.set_attribute(SERVICE_NAME,
200
+ application_name)
147
201
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
148
202
  True)
149
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
150
- self._prompt_tokens)
151
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
152
- self._completion_tokens)
153
203
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
154
- self._prompt_tokens + self._completion_tokens)
204
+ self._input_tokens + self._output_tokens)
155
205
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
156
206
  cost)
207
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
208
+ self._tbt)
209
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
210
+ self._ttft)
211
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
212
+ version)
157
213
  if trace_content:
158
214
  self._span.add_event(
159
215
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -167,31 +223,35 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
167
223
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
168
224
  },
169
225
  )
170
-
171
226
  self._span.set_status(Status(StatusCode.OK))
172
227
 
173
228
  if disable_metrics is False:
174
- attributes = {
175
- TELEMETRY_SDK_NAME:
176
- "openlit",
177
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
178
- application_name,
179
- SemanticConvetion.GEN_AI_SYSTEM:
180
- SemanticConvetion.GEN_AI_SYSTEM_AI21,
181
- SemanticConvetion.GEN_AI_ENVIRONMENT:
182
- environment,
183
- SemanticConvetion.GEN_AI_TYPE:
184
- SemanticConvetion.GEN_AI_TYPE_CHAT,
185
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
186
- self._kwargs.get("model", "jamba-1.5-mini")
187
- }
229
+ attributes = create_metrics_attributes(
230
+ service_name=application_name,
231
+ deployment_environment=environment,
232
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
233
+ system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
234
+ request_model=request_model,
235
+ server_address=self._server_address,
236
+ server_port=self._server_port,
237
+ response_model=request_model,
238
+ )
188
239
 
189
- metrics["genai_requests"].add(1, attributes)
190
- metrics["genai_total_tokens"].add(
191
- self._prompt_tokens + self._completion_tokens, attributes
240
+ metrics["genai_client_usage_tokens"].record(
241
+ self._input_tokens + self._output_tokens, attributes
242
+ )
243
+ metrics["genai_client_operation_duration"].record(
244
+ self._end_time - self._start_time, attributes
192
245
  )
193
- metrics["genai_completion_tokens"].add(self._completion_tokens, attributes)
194
- metrics["genai_prompt_tokens"].add(self._prompt_tokens, attributes)
246
+ metrics["genai_server_tbt"].record(
247
+ self._tbt, attributes
248
+ )
249
+ metrics["genai_server_ttft"].record(
250
+ self._ttft, attributes
251
+ )
252
+ metrics["genai_requests"].add(1, attributes)
253
+ metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
254
+ metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
195
255
  metrics["genai_cost"].record(cost, attributes)
196
256
 
197
257
  except Exception as e:
@@ -220,19 +280,25 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
220
280
 
221
281
  # Check if streaming is enabled for the API call
222
282
  streaming = kwargs.get("stream", False)
283
+ server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
284
+ request_model = kwargs.get("model", "jamba-1.5-mini")
285
+
286
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
287
+
223
288
  # pylint: disable=no-else-return
224
289
  if streaming:
225
290
  # Special handling for streaming response to accommodate the nature of data flow
226
291
  awaited_wrapped = await wrapped(*args, **kwargs)
227
- span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
292
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
228
293
 
229
- return TracedAsyncStream(awaited_wrapped, span, kwargs)
294
+ return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
230
295
 
231
296
  # Handling for non-streaming responses
232
297
  else:
233
- # pylint: disable=line-too-long
234
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
298
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
299
+ start_time = time.time()
235
300
  response = await wrapped(*args, **kwargs)
301
+ end_time = time.time()
236
302
 
237
303
  response_dict = response_as_dict(response)
238
304
 
@@ -246,7 +312,6 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
246
312
 
247
313
  if isinstance(content, list):
248
314
  content_str = ", ".join(
249
- # pylint: disable=line-too-long
250
315
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
251
316
  if "type" in item else f'text: {item["text"]}'
252
317
  for item in content
@@ -256,30 +321,64 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
256
321
  formatted_messages.append(f"{role}: {content}")
257
322
  prompt = "\n".join(formatted_messages)
258
323
 
259
- # Set base span attribues
324
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
325
+ output_tokens = response_dict.get('usage').get('completion_tokens')
326
+
327
+ # Calculate cost of the operation
328
+ cost = get_chat_model_cost(request_model,
329
+ pricing_info, input_tokens,
330
+ output_tokens)
331
+
332
+ # Set base span attribues (OTel Semconv)
260
333
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
334
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
335
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
261
336
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
262
337
  SemanticConvetion.GEN_AI_SYSTEM_AI21)
263
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
264
- SemanticConvetion.GEN_AI_TYPE_CHAT)
265
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
266
- gen_ai_endpoint)
267
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
268
- response_dict.get("id"))
269
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
270
- environment)
271
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
272
- application_name)
273
338
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
274
- kwargs.get("model", "jamba-1.5-mini"))
275
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
276
- kwargs.get("top_p", 1.0))
339
+ request_model)
340
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
341
+ kwargs.get("seed", ""))
342
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
343
+ server_port)
344
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
345
+ kwargs.get("frequency_penalty", 0.0))
277
346
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
278
347
  kwargs.get("max_tokens", -1))
348
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
349
+ kwargs.get("presence_penalty", 0.0))
350
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
351
+ kwargs.get("stop", []))
279
352
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
280
- kwargs.get("temperature", 1.0))
353
+ kwargs.get("temperature", 0.4))
354
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
355
+ kwargs.get("top_p", 1.0))
356
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
357
+ response_dict.get("id"))
358
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
359
+ request_model)
360
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
361
+ input_tokens)
362
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
363
+ output_tokens)
364
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
365
+ server_address)
366
+
367
+ # Set base span attribues (Extras)
368
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
369
+ environment)
370
+ span.set_attribute(SERVICE_NAME,
371
+ application_name)
281
372
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
282
373
  False)
374
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
375
+ input_tokens + output_tokens)
376
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
377
+ cost)
378
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
379
+ end_time - start_time)
380
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
381
+ version)
283
382
  if trace_content:
284
383
  span.add_event(
285
384
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -288,93 +387,54 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
288
387
  },
289
388
  )
290
389
 
291
- # Set span attributes when tools is not passed to the function call
292
- if "tools" not in kwargs:
293
- # Calculate cost of the operation
294
- cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
295
- pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
296
- response_dict.get('usage', {}).get('completion_tokens', None))
297
-
298
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
299
- response_dict.get('usage', {}).get('prompt_tokens', None))
300
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
301
- response_dict.get('usage', {}).get('completion_tokens', None))
302
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
303
- response_dict.get('usage', {}).get('total_tokens', None))
390
+ for i in range(kwargs.get('n',1)):
304
391
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
305
- [response_dict.get('choices', [])[0].get('finish_reason', None)])
306
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
307
- cost)
308
-
309
- # Set span attributes for when n = 1 (default)
310
- if "n" not in kwargs or kwargs["n"] == 1:
311
- if trace_content:
312
- span.add_event(
313
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
314
- attributes={
315
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
316
- },
317
- )
318
-
319
- # Set span attributes for when n > 0
320
- else:
321
- i = 0
322
- while i < kwargs["n"] and trace_content is True:
323
- attribute_name = f"gen_ai.content.completion.{i}"
324
- span.add_event(
325
- name=attribute_name,
326
- attributes={
327
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
328
- },
329
- )
330
- i += 1
331
-
332
- # Return original response
333
- return response
334
-
335
- # Set span attributes when tools is passed to the function call
336
- elif "tools" in kwargs:
337
- # Calculate cost of the operation
338
- cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
339
- pricing_info, response_dict.get('usage').get('prompt_tokens'),
340
- response_dict.get('usage').get('completion_tokens'))
341
- span.add_event(
342
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
343
- attributes={
344
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
345
- },
346
- )
347
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
348
- response_dict.get('usage').get('prompt_tokens'))
349
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
350
- response_dict.get('usage').get('completion_tokens'))
351
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
352
- response_dict.get('usage').get('total_tokens'))
353
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
354
- cost)
392
+ [response_dict.get('choices')[i].get('finish_reason')])
393
+ if trace_content:
394
+ span.add_event(
395
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
396
+ attributes={
397
+ # pylint: disable=line-too-long
398
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
399
+ },
400
+ )
401
+ if kwargs.get('tools'):
402
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
403
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
404
+
405
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
406
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
407
+ "text")
408
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
409
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
410
+ "json")
355
411
 
356
412
  span.set_status(Status(StatusCode.OK))
357
413
 
358
414
  if disable_metrics is False:
359
- attributes = {
360
- TELEMETRY_SDK_NAME:
361
- "openlit",
362
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
363
- application_name,
364
- SemanticConvetion.GEN_AI_SYSTEM:
365
- SemanticConvetion.GEN_AI_SYSTEM_AI21,
366
- SemanticConvetion.GEN_AI_ENVIRONMENT:
367
- environment,
368
- SemanticConvetion.GEN_AI_TYPE:
369
- SemanticConvetion.GEN_AI_TYPE_CHAT,
370
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
371
- kwargs.get("model", "jamba-1.5-mini")
372
- }
415
+ attributes = create_metrics_attributes(
416
+ service_name=application_name,
417
+ deployment_environment=environment,
418
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
419
+ system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
420
+ request_model=request_model,
421
+ server_address=server_address,
422
+ server_port=server_port,
423
+ response_model=request_model,
424
+ )
373
425
 
426
+ metrics["genai_client_usage_tokens"].record(
427
+ input_tokens + output_tokens, attributes
428
+ )
429
+ metrics["genai_client_operation_duration"].record(
430
+ end_time - start_time, attributes
431
+ )
432
+ metrics["genai_server_ttft"].record(
433
+ end_time - start_time, attributes
434
+ )
374
435
  metrics["genai_requests"].add(1, attributes)
375
- metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
376
- metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
377
- metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
436
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
437
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
378
438
  metrics["genai_cost"].record(cost, attributes)
379
439
 
380
440
  # Return original response
@@ -389,13 +449,12 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
389
449
 
390
450
  return wrapper
391
451
 
392
- def async_chat_rag(gen_ai_endpoint, version, environment, application_name,
452
+ def async_chat_rag(version, environment, application_name,
393
453
  tracer, pricing_info, trace_content, metrics, disable_metrics):
394
454
  """
395
455
  Generates a telemetry wrapper for chat completions to collect metrics.
396
456
 
397
457
  Args:
398
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
399
458
  version: Version of the monitoring package.
400
459
  environment: Deployment environment (e.g., production, staging).
401
460
  application_name: Name of the application using the AI21 SDK.
@@ -424,180 +483,173 @@ def async_chat_rag(gen_ai_endpoint, version, environment, application_name,
424
483
  The response from the original 'chat.completions' method.
425
484
  """
426
485
 
427
- # Check if streaming is enabled for the API call
428
- streaming = kwargs.get("stream", False)
429
-
430
- # pylint: disable=no-else-return
431
- if streaming:
432
- # # Special handling for streaming response to accommodate the nature of data flow
433
- # awaited_wrapped = wrapped(*args, **kwargs)
434
- # span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
435
-
436
- # return TracedSyncStream(awaited_wrapped, span, kwargs)
486
+ server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
487
+ request_model = kwargs.get("model", "jamba-1.5-mini")
437
488
 
438
- return
489
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
439
490
 
440
- # Handling for non-streaming responses
441
- else:
442
- # pylint: disable=line-too-long
443
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
444
- response = await wrapped(*args, **kwargs)
491
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
492
+ start_time = time.time()
493
+ response = await wrapped(*args, **kwargs)
494
+ end_time = time.time()
445
495
 
446
- response_dict = response_as_dict(response)
496
+ response_dict = response_as_dict(response)
447
497
 
448
- try:
449
- # Format 'messages' into a single string
450
- message_prompt = kwargs.get("messages", "")
451
- formatted_messages = []
452
- for message in message_prompt:
453
- role = message.role
454
- content = message.content
455
-
456
- if isinstance(content, list):
457
- content_str = ", ".join(
458
- # pylint: disable=line-too-long
459
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
460
- if "type" in item else f'text: {item["text"]}'
461
- for item in content
462
- )
463
- formatted_messages.append(f"{role}: {content_str}")
464
- else:
465
- formatted_messages.append(f"{role}: {content}")
466
- prompt = "\n".join(formatted_messages)
467
-
468
- # Set base span attribues
469
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
470
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
471
- SemanticConvetion.GEN_AI_SYSTEM_AI21)
472
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
473
- SemanticConvetion.GEN_AI_TYPE_CHAT)
474
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
475
- gen_ai_endpoint)
476
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
477
- response_dict.get("id"))
478
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
479
- environment)
480
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
481
- application_name)
482
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
483
- kwargs.get("model", "jamba-1.5-mini"))
484
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
485
- False)
486
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
487
- kwargs.get("max_segments", -1))
488
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
489
- kwargs.get("retrieval_strategy", "segments"))
490
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
491
- kwargs.get("retrieval_similarity_threshold", -1))
492
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
493
- kwargs.get("max_neighbors", -1))
494
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
495
- str(kwargs.get("file_ids", "")))
496
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
497
- kwargs.get("path", ""))
498
-
499
- if trace_content:
500
- span.add_event(
501
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
502
- attributes={
503
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
504
- },
498
+ try:
499
+ # Format 'messages' into a single string
500
+ message_prompt = kwargs.get("messages", "")
501
+ formatted_messages = []
502
+ for message in message_prompt:
503
+ role = message.role
504
+ content = message.content
505
+
506
+ if isinstance(content, list):
507
+ content_str = ", ".join(
508
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
509
+ if "type" in item else f'text: {item["text"]}'
510
+ for item in content
505
511
  )
506
- prompt_tokens = general_tokens(prompt)
507
-
508
-
509
- # Set span attributes when tools is not passed to the function call
510
- if "tools" not in kwargs:
511
- prompt_tokens = general_tokens(prompt)
512
-
513
- # Set span attributes for when n = 1 (default)
514
- if "n" not in kwargs or kwargs["n"] == 1:
515
- completion_tokens = general_tokens(response_dict.get('choices', [])[0].get("content"))
516
- if trace_content:
517
- span.add_event(
518
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
519
- attributes={
520
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("content"),
521
- },
522
- )
512
+ formatted_messages.append(f"{role}: {content_str}")
513
+ else:
514
+ formatted_messages.append(f"{role}: {content}")
515
+ prompt = "\n".join(formatted_messages)
516
+
517
+ input_tokens = general_tokens(prompt)
518
+
519
+ # Set base span attribues (OTel Semconv)
520
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
521
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
522
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
523
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
524
+ SemanticConvetion.GEN_AI_SYSTEM_AI21)
525
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
526
+ request_model)
527
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
528
+ kwargs.get("seed", ""))
529
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
530
+ server_port)
531
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
532
+ kwargs.get("frequency_penalty", 0.0))
533
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
534
+ kwargs.get("max_tokens", -1))
535
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
536
+ kwargs.get("presence_penalty", 0.0))
537
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
538
+ kwargs.get("stop", []))
539
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
540
+ kwargs.get("temperature", 0.4))
541
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
542
+ kwargs.get("top_p", 1.0))
543
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
544
+ response_dict.get("id"))
545
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
546
+ request_model)
547
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
548
+ input_tokens)
549
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
550
+ server_address)
551
+
552
+ # Set base span attribues (Extras)
553
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
554
+ environment)
555
+ span.set_attribute(SERVICE_NAME,
556
+ application_name)
557
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
558
+ False)
559
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
560
+ end_time - start_time)
561
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
562
+ version)
563
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
564
+ kwargs.get("max_segments", -1))
565
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
566
+ kwargs.get("retrieval_strategy", "segments"))
567
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
568
+ kwargs.get("retrieval_similarity_threshold", -1))
569
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
570
+ kwargs.get("max_neighbors", -1))
571
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
572
+ str(kwargs.get("file_ids", "")))
573
+ span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
574
+ kwargs.get("path", ""))
575
+ if trace_content:
576
+ span.add_event(
577
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
578
+ attributes={
579
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
580
+ },
581
+ )
582
+
583
+ output_tokens = 0
584
+ for i in range(kwargs.get('n',1)):
585
+ output_tokens += general_tokens(response_dict.get('choices')[i].get('content'))
523
586
 
524
- # Set span attributes for when n > 0
525
- else:
526
- i = 0
527
- completion_tokens = 0
528
- while i < kwargs["n"] and trace_content is True:
529
- completion_tokens += general_tokens(response_dict.get('choices')[i].get("message").get("content"))
530
- attribute_name = f"gen_ai.content.completion.{i}"
531
- span.add_event(
532
- name=attribute_name,
533
- attributes={
534
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
535
- },
536
- )
537
- i += 1
538
-
539
- # Return original response
540
- return response
541
-
542
- # Set span attributes when tools is passed to the function call
543
- elif "tools" in kwargs:
544
- completion_tokens = -1
545
- # Calculate cost of the operation
546
- cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
547
- pricing_info, response_dict.get('usage').get('prompt_tokens'),
548
- response_dict.get('usage').get('completion_tokens'))
587
+ if trace_content:
549
588
  span.add_event(
550
589
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
551
590
  attributes={
552
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
591
+ # pylint: disable=line-too-long
592
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('content')),
553
593
  },
554
594
  )
555
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
556
- prompt_tokens)
557
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
558
- completion_tokens)
559
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
560
- prompt_tokens + completion_tokens)
561
-
562
- # Calculate cost of the operation
563
- cost = get_chat_model_cost(kwargs.get("model", "jamba-1.5-mini"),
564
- pricing_info, prompt_tokens,
565
- completion_tokens)
566
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
567
- cost)
568
-
569
- span.set_status(Status(StatusCode.OK))
570
-
571
- if disable_metrics is False:
572
- attributes = {
573
- TELEMETRY_SDK_NAME:
574
- "openlit",
575
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
576
- application_name,
577
- SemanticConvetion.GEN_AI_SYSTEM:
578
- SemanticConvetion.GEN_AI_SYSTEM_AI21,
579
- SemanticConvetion.GEN_AI_ENVIRONMENT:
580
- environment,
581
- SemanticConvetion.GEN_AI_TYPE:
582
- SemanticConvetion.GEN_AI_TYPE_CHAT,
583
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
584
- kwargs.get("model", "jamba-1.5-mini")
585
- }
586
-
587
- metrics["genai_requests"].add(1, attributes)
588
- metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
589
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
590
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
591
- metrics["genai_cost"].record(cost, attributes)
592
-
593
- # Return original response
594
- return response
595
-
596
- except Exception as e:
597
- handle_exception(span, e)
598
- logger.error("Error in trace creation: %s", e)
599
-
600
- # Return original response
601
- return response
595
+ if kwargs.get('tools'):
596
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
597
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
598
+
599
+ if isinstance(response_dict.get('choices')[i].get('content'), str):
600
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
601
+ "text")
602
+ elif response_dict.get('choices')[i].get('content') is not None:
603
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
604
+ "json")
605
+
606
+ # Calculate cost of the operation
607
+ cost = get_chat_model_cost(request_model,
608
+ pricing_info, input_tokens,
609
+ output_tokens)
610
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
611
+ cost)
612
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
613
+ output_tokens)
614
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
615
+ input_tokens + output_tokens)
616
+
617
+ span.set_status(Status(StatusCode.OK))
618
+
619
+ if disable_metrics is False:
620
+ attributes = create_metrics_attributes(
621
+ service_name=application_name,
622
+ deployment_environment=environment,
623
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
624
+ system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
625
+ request_model=request_model,
626
+ server_address=server_address,
627
+ server_port=server_port,
628
+ response_model=request_model,
629
+ )
630
+
631
+ metrics["genai_client_usage_tokens"].record(
632
+ input_tokens + output_tokens, attributes
633
+ )
634
+ metrics["genai_client_operation_duration"].record(
635
+ end_time - start_time, attributes
636
+ )
637
+ metrics["genai_server_ttft"].record(
638
+ end_time - start_time, attributes
639
+ )
640
+ metrics["genai_requests"].add(1, attributes)
641
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
642
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
643
+ metrics["genai_cost"].record(cost, attributes)
644
+
645
+ # Return original response
646
+ return response
647
+
648
+ except Exception as e:
649
+ handle_exception(span, e)
650
+ logger.error("Error in trace creation: %s", e)
651
+
652
+ # Return original response
653
+ return response
602
654
 
603
655
  return wrapper