openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. openlit/__helpers.py +88 -0
  2. openlit/__init__.py +4 -3
  3. openlit/instrumentation/ag2/ag2.py +5 -5
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +9 -9
  13. openlit/instrumentation/astra/async_astra.py +9 -9
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +9 -9
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +5 -5
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  26. openlit/instrumentation/crewai/crewai.py +6 -4
  27. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
  30. openlit/instrumentation/embedchain/embedchain.py +9 -9
  31. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/__init__.py +2 -2
  36. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  37. openlit/instrumentation/gpu/__init__.py +5 -5
  38. openlit/instrumentation/groq/__init__.py +2 -2
  39. openlit/instrumentation/groq/async_groq.py +356 -240
  40. openlit/instrumentation/groq/groq.py +356 -240
  41. openlit/instrumentation/haystack/haystack.py +5 -5
  42. openlit/instrumentation/julep/async_julep.py +5 -5
  43. openlit/instrumentation/julep/julep.py +5 -5
  44. openlit/instrumentation/langchain/__init__.py +13 -7
  45. openlit/instrumentation/langchain/async_langchain.py +384 -0
  46. openlit/instrumentation/langchain/langchain.py +105 -492
  47. openlit/instrumentation/letta/letta.py +11 -9
  48. openlit/instrumentation/litellm/__init__.py +4 -5
  49. openlit/instrumentation/litellm/async_litellm.py +318 -247
  50. openlit/instrumentation/litellm/litellm.py +314 -243
  51. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  52. openlit/instrumentation/mem0/mem0.py +5 -5
  53. openlit/instrumentation/milvus/milvus.py +9 -9
  54. openlit/instrumentation/mistral/__init__.py +6 -6
  55. openlit/instrumentation/mistral/async_mistral.py +423 -250
  56. openlit/instrumentation/mistral/mistral.py +420 -246
  57. openlit/instrumentation/multion/async_multion.py +6 -4
  58. openlit/instrumentation/multion/multion.py +6 -4
  59. openlit/instrumentation/ollama/__init__.py +8 -30
  60. openlit/instrumentation/ollama/async_ollama.py +385 -417
  61. openlit/instrumentation/ollama/ollama.py +384 -417
  62. openlit/instrumentation/openai/__init__.py +11 -230
  63. openlit/instrumentation/openai/async_openai.py +433 -410
  64. openlit/instrumentation/openai/openai.py +414 -394
  65. openlit/instrumentation/phidata/phidata.py +6 -4
  66. openlit/instrumentation/pinecone/pinecone.py +9 -9
  67. openlit/instrumentation/premai/__init__.py +2 -2
  68. openlit/instrumentation/premai/premai.py +262 -213
  69. openlit/instrumentation/qdrant/async_qdrant.py +9 -9
  70. openlit/instrumentation/qdrant/qdrant.py +9 -9
  71. openlit/instrumentation/reka/__init__.py +2 -2
  72. openlit/instrumentation/reka/async_reka.py +90 -52
  73. openlit/instrumentation/reka/reka.py +90 -52
  74. openlit/instrumentation/together/__init__.py +4 -4
  75. openlit/instrumentation/together/async_together.py +278 -236
  76. openlit/instrumentation/together/together.py +278 -236
  77. openlit/instrumentation/transformers/__init__.py +1 -1
  78. openlit/instrumentation/transformers/transformers.py +76 -45
  79. openlit/instrumentation/vertexai/__init__.py +14 -64
  80. openlit/instrumentation/vertexai/async_vertexai.py +330 -987
  81. openlit/instrumentation/vertexai/vertexai.py +330 -987
  82. openlit/instrumentation/vllm/__init__.py +1 -1
  83. openlit/instrumentation/vllm/vllm.py +66 -36
  84. openlit/otel/metrics.py +98 -7
  85. openlit/semcov/__init__.py +113 -80
  86. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  87. openlit-1.33.10.dist-info/RECORD +122 -0
  88. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
  89. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  90. openlit/instrumentation/openai/azure_openai.py +0 -898
  91. openlit-1.33.8.dist-info/RECORD +0 -122
  92. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -1,21 +1,24 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment, protected-access
2
1
  """
3
2
  Module for monitoring Google AI Studio API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
- handle_exception,
11
10
  get_chat_model_cost,
11
+ handle_exception,
12
+ response_as_dict,
13
+ create_metrics_attributes,
14
+ set_server_address_and_port
12
15
  )
13
16
  from openlit.semcov import SemanticConvetion
14
17
 
15
18
  # Initialize logger for logging potential issues and operations
16
19
  logger = logging.getLogger(__name__)
17
20
 
18
- def async_generate(gen_ai_endpoint, version, environment, application_name,
21
+ def async_generate(version, environment, application_name,
19
22
  tracer, pricing_info, trace_content, metrics, disable_metrics):
20
23
  """
21
24
  Generates a telemetry wrapper for chat to collect metrics.
@@ -35,229 +38,190 @@ def async_generate(gen_ai_endpoint, version, environment, application_name,
35
38
 
36
39
  async def wrapper(wrapped, instance, args, kwargs):
37
40
  """
38
- Wraps the 'chat' API call to add telemetry.
39
-
41
+ Wraps the 'chat.completions' API call to add telemetry.
42
+
40
43
  This collects metrics such as execution time, cost, and token usage, and handles errors
41
44
  gracefully, adding details to the trace for observability.
42
45
 
43
46
  Args:
44
- wrapped: The original 'chat' method to be wrapped.
47
+ wrapped: The original 'chat.completions' method to be wrapped.
45
48
  instance: The instance of the class where the original method is defined.
46
- args: Positional arguments for the 'chat' method.
47
- kwargs: Keyword arguments for the 'chat' method.
49
+ args: Positional arguments for the 'chat.completions' method.
50
+ kwargs: Keyword arguments for the 'chat.completions' method.
48
51
 
49
52
  Returns:
50
- The response from the original 'chat' method.
53
+ The response from the original 'chat.completions' method.
51
54
  """
52
- # pylint: disable=no-else-return
53
- if kwargs.get('stream', False) is True:
54
- # Special handling for streaming response to accommodate the nature of data flow
55
- async def stream_generator():
56
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
57
- # Placeholder for aggregating streaming response
58
- llmresponse = ""
59
-
60
- # Loop through streaming events capturing relevant details
61
- async for chunk in await wrapped(*args, **kwargs):
62
- # Collect message IDs and aggregated response from events
63
- content = chunk.text
64
- if content:
65
- llmresponse += content
66
-
67
- input_tokens = chunk.usage_metadata.prompt_token_count
68
- output_tokens = chunk.usage_metadata.candidates_token_count
69
- yield chunk
70
-
71
- # Handling exception ensure observability without disrupting operation
72
- try:
73
- prompt = ""
74
- for arg in args:
75
- if isinstance(arg, str):
76
- prompt = f"{prompt}{arg}\n"
77
- elif isinstance(arg, list):
78
- for subarg in arg:
79
- prompt = f"{prompt}{subarg}\n"
80
- if hasattr(instance, "_model_id"):
81
- model = instance._model_id
82
- if hasattr(instance, "_model_name"):
83
- model = instance._model_name.replace("publishers/google/models/", "")
84
- if model.startswith("models/"):
85
- model = model[len("models/"):]
86
-
87
- total_tokens = input_tokens + output_tokens
88
- # Calculate cost of the operation
89
- cost = get_chat_model_cost(model,
90
- pricing_info, input_tokens,
91
- output_tokens)
92
-
93
- # Set Span attributes
94
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
95
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
96
- SemanticConvetion.GEN_AI_SYSTEM_GOOGLE_AI_STUDIO)
97
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
98
- SemanticConvetion.GEN_AI_TYPE_CHAT)
99
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
100
- gen_ai_endpoint)
101
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
102
- environment)
103
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
104
- application_name)
105
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
106
- model)
107
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
108
- True)
109
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
110
- input_tokens)
111
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
55
+
56
+ server_address, server_port = set_server_address_and_port(instance, "generativelanguage.googleapis.com", 443)
57
+ request_model = kwargs.get("model", "gemini-2.0-flash")
58
+
59
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
60
+
61
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
62
+ start_time = time.time()
63
+ response = await wrapped(*args, **kwargs)
64
+ end_time = time.time()
65
+
66
+ response_dict = response_as_dict(response)
67
+
68
+ try:
69
+ # Format 'messages' into a single string
70
+ message_prompt = kwargs.get("contents", [])
71
+ formatted_messages = []
72
+
73
+ for content in message_prompt:
74
+ role = content.role
75
+ parts = content.parts
76
+ content_str = []
77
+
78
+ for part in parts:
79
+ # Collect relevant fields and handle each type of data that Part could contain
80
+ if part.text:
81
+ content_str.append(f"text: {part.text}")
82
+ if part.video_metadata:
83
+ content_str.append(f"video_metadata: {part.video_metadata}")
84
+ if part.thought:
85
+ content_str.append(f"thought: {part.thought}")
86
+ if part.code_execution_result:
87
+ content_str.append(f"code_execution_result: {part.code_execution_result}")
88
+ if part.executable_code:
89
+ content_str.append(f"executable_code: {part.executable_code}")
90
+ if part.file_data:
91
+ content_str.append(f"file_data: {part.file_data}")
92
+ if part.function_call:
93
+ content_str.append(f"function_call: {part.function_call}")
94
+ if part.function_response:
95
+ content_str.append(f"function_response: {part.function_response}")
96
+ if part.inline_data:
97
+ content_str.append(f"inline_data: {part.inline_data}")
98
+
99
+ formatted_messages.append(f"{role}: {', '.join(content_str)}")
100
+
101
+ prompt = "\n".join(formatted_messages)
102
+
103
+ input_tokens = response_dict.get('usage_metadata').get('prompt_token_count')
104
+ output_tokens = response_dict.get('usage_metadata').get('candidates_token_count')
105
+
106
+ # Calculate cost of the operation
107
+ cost = get_chat_model_cost(request_model,
108
+ pricing_info, input_tokens,
112
109
  output_tokens)
113
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
114
- total_tokens)
115
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
116
- cost)
117
- if trace_content:
118
- span.add_event(
119
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
120
- attributes={
121
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
122
- },
123
- )
124
- span.add_event(
125
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
126
- attributes={
127
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
128
- },
129
- )
130
-
131
- span.set_status(Status(StatusCode.OK))
132
-
133
- if disable_metrics is False:
134
- attributes = {
135
- TELEMETRY_SDK_NAME:
136
- "openlit",
137
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
138
- application_name,
139
- SemanticConvetion.GEN_AI_SYSTEM:
140
- SemanticConvetion.GEN_AI_SYSTEM_GOOGLE_AI_STUDIO,
141
- SemanticConvetion.GEN_AI_ENVIRONMENT:
142
- environment,
143
- SemanticConvetion.GEN_AI_TYPE:
144
- SemanticConvetion.GEN_AI_TYPE_CHAT,
145
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
146
- model
147
- }
148
-
149
- metrics["genai_requests"].add(1, attributes)
150
- metrics["genai_total_tokens"].add(
151
- total_tokens, attributes
152
- )
153
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
154
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
155
- metrics["genai_cost"].record(cost, attributes)
156
-
157
- except Exception as e:
158
- handle_exception(span, e)
159
- logger.error("Error in trace creation: %s", e)
160
-
161
- return stream_generator()
162
- else:
163
- # pylint: disable=line-too-long
164
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
165
- response = await wrapped(*args, **kwargs)
166
-
167
- try:
168
- prompt = ""
169
- for arg in args:
170
- if isinstance(arg, str):
171
- prompt = f"{prompt}{arg}\n"
172
- elif isinstance(arg, list):
173
- for subarg in arg:
174
- prompt = f"{prompt}{subarg}\n"
175
- if hasattr(instance, "_model_id"):
176
- model = instance._model_id
177
- if hasattr(instance, "_model_name"):
178
- model = instance._model_name.replace("publishers/google/models/", "")
179
- if model.startswith("models/"):
180
- model = model[len("models/"):]
181
-
182
- # Set base span attribues
183
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
184
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
185
- SemanticConvetion.GEN_AI_SYSTEM_GOOGLE_AI_STUDIO)
186
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
187
- SemanticConvetion.GEN_AI_TYPE_CHAT)
188
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
189
- gen_ai_endpoint)
190
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
191
- environment)
192
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
193
- application_name)
194
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
195
- model)
196
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
197
- False)
198
-
199
- if trace_content:
200
- span.add_event(
201
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
202
- attributes={
203
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
204
- },
205
- )
206
- span.add_event(
207
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
208
- attributes={
209
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
210
- },
211
- )
212
-
213
- prompt_tokens = response.usage_metadata.prompt_token_count
214
- completion_tokens = response.usage_metadata.candidates_token_count
215
- total_tokens = response.usage_metadata.total_token_count
216
- # Calculate cost of the operation
217
- cost = get_chat_model_cost(model,
218
- pricing_info, prompt_tokens, completion_tokens)
219
-
220
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
221
- prompt_tokens)
222
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
223
- completion_tokens)
224
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
225
- total_tokens)
226
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
227
- cost)
228
-
229
- span.set_status(Status(StatusCode.OK))
230
-
231
- if disable_metrics is False:
232
- attributes = {
233
- TELEMETRY_SDK_NAME:
234
- "openlit",
235
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
236
- application_name,
237
- SemanticConvetion.GEN_AI_SYSTEM:
238
- SemanticConvetion.GEN_AI_SYSTEM_GOOGLE_AI_STUDIO,
239
- SemanticConvetion.GEN_AI_ENVIRONMENT:
240
- environment,
241
- SemanticConvetion.GEN_AI_TYPE:
242
- SemanticConvetion.GEN_AI_TYPE_CHAT,
243
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
244
- model
245
- }
246
-
247
- metrics["genai_requests"].add(1, attributes)
248
- metrics["genai_total_tokens"].add(total_tokens, attributes)
249
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
250
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
251
- metrics["genai_cost"].record(cost, attributes)
252
-
253
- # Return original response
254
- return response
255
-
256
- except Exception as e:
257
- handle_exception(span, e)
258
- logger.error("Error in trace creation: %s", e)
259
-
260
- # Return original response
261
- return response
110
+
111
+ # Set base span attribues (OTel Semconv)
112
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
113
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
114
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
115
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
116
+ SemanticConvetion.GEN_AI_SYSTEM_GEMINI)
117
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
118
+ request_model)
119
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
120
+ server_port)
121
+
122
+ inference_config = kwargs.get('config', {})
123
+
124
+ # List of attributes and their config keys
125
+ attributes = [
126
+ (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
127
+ (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'max_tokens'),
128
+ (SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
129
+ (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
130
+ (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
131
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'top_p'),
132
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'top_k'),
133
+ ]
134
+
135
+ # Set each attribute if the corresponding value exists and is not None
136
+ for attribute, key in attributes:
137
+ # Use getattr to get the attribute value from the object
138
+ value = getattr(inference_config, key, None)
139
+ if value is not None:
140
+ span.set_attribute(attribute, value)
141
+
142
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
143
+ response_dict.get('model_version'))
144
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
145
+ input_tokens)
146
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
147
+ output_tokens)
148
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
149
+ server_address)
150
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
151
+ [str(response_dict.get('candidates')[0].get('finish_reason'))])
152
+
153
+ # Set base span attribues (Extras)
154
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
155
+ environment)
156
+ span.set_attribute(SERVICE_NAME,
157
+ application_name)
158
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
159
+ False)
160
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
161
+ input_tokens + output_tokens)
162
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
163
+ cost)
164
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
165
+ end_time - start_time)
166
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
167
+ version)
168
+ if trace_content:
169
+ span.add_event(
170
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
171
+ attributes={
172
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
173
+ },
174
+ )
175
+ span.add_event(
176
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
177
+ attributes={
178
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
179
+ },
180
+ )
181
+
182
+ if isinstance(response_dict.get('text'), str):
183
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
184
+ "text")
185
+ elif response_dict.get('text') is not None:
186
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
187
+ "json")
188
+
189
+ span.set_status(Status(StatusCode.OK))
190
+
191
+ if disable_metrics is False:
192
+ attributes = create_metrics_attributes(
193
+ service_name=application_name,
194
+ deployment_environment=environment,
195
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
196
+ system=SemanticConvetion.GEN_AI_SYSTEM_GEMINI,
197
+ request_model=request_model,
198
+ server_address=server_address,
199
+ server_port=server_port,
200
+ response_model=response_dict.get('model_version'),
201
+ )
202
+
203
+ metrics["genai_client_usage_tokens"].record(
204
+ input_tokens + output_tokens, attributes
205
+ )
206
+ metrics["genai_client_operation_duration"].record(
207
+ end_time - start_time, attributes
208
+ )
209
+ metrics["genai_server_ttft"].record(
210
+ end_time - start_time, attributes
211
+ )
212
+ metrics["genai_requests"].add(1, attributes)
213
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
214
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
215
+ metrics["genai_cost"].record(cost, attributes)
216
+
217
+ # Return original response
218
+ return response
219
+
220
+ except Exception as e:
221
+ handle_exception(span, e)
222
+ logger.error("Error in trace creation: %s", e)
223
+
224
+ # Return original response
225
+ return response
262
226
 
263
227
  return wrapper