openlit 1.34.20__py3-none-any.whl → 1.34.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,3 @@
1
- # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
1
  """Initializer of Auto Instrumentation of VertexAI Functions"""
3
2
 
4
3
  from typing import Collection
@@ -6,92 +5,88 @@ import importlib.metadata
6
5
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
7
6
  from wrapt import wrap_function_wrapper
8
7
 
9
- from openlit.instrumentation.vertexai.vertexai import (
10
- send_message
11
- )
12
- from openlit.instrumentation.vertexai.async_vertexai import (
13
- async_send_message
14
- )
15
-
8
+ from openlit.instrumentation.vertexai.vertexai import send_message
9
+ from openlit.instrumentation.vertexai.async_vertexai import async_send_message
16
10
 
17
11
  _instruments = ("google-cloud-aiplatform >= 1.38.1",)
18
12
 
19
13
  class VertexAIInstrumentor(BaseInstrumentor):
20
14
  """
21
- An instrumentor for VertexAI's client library.
15
+ An instrumentor for VertexAI client library.
22
16
  """
23
17
 
24
18
  def instrumentation_dependencies(self) -> Collection[str]:
25
19
  return _instruments
26
20
 
27
21
  def _instrument(self, **kwargs):
28
- application_name = kwargs.get("application_name", "default")
22
+ version = importlib.metadata.version("google-cloud-aiplatform")
29
23
  environment = kwargs.get("environment", "default")
24
+ application_name = kwargs.get("application_name", "default")
30
25
  tracer = kwargs.get("tracer")
31
- metrics = kwargs.get("metrics_dict")
32
26
  pricing_info = kwargs.get("pricing_info", {})
33
27
  capture_message_content = kwargs.get("capture_message_content", False)
28
+ metrics = kwargs.get("metrics_dict")
34
29
  disable_metrics = kwargs.get("disable_metrics")
35
- version = importlib.metadata.version("google-cloud-aiplatform")
36
30
 
37
- #sync
31
+ # sync generative models
38
32
  wrap_function_wrapper(
39
33
  "vertexai.generative_models",
40
34
  "GenerativeModel.generate_content",
41
35
  send_message(version, environment, application_name,
42
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
36
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
43
37
  )
44
38
 
45
39
  wrap_function_wrapper(
46
40
  "vertexai.generative_models",
47
41
  "ChatSession.send_message",
48
42
  send_message(version, environment, application_name,
49
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
43
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
50
44
  )
51
45
 
46
+ # sync language models
52
47
  wrap_function_wrapper(
53
48
  "vertexai.language_models",
54
49
  "ChatSession.send_message",
55
50
  send_message(version, environment, application_name,
56
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
51
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
57
52
  )
58
53
 
59
54
  wrap_function_wrapper(
60
55
  "vertexai.language_models",
61
56
  "ChatSession.send_message_streaming",
62
57
  send_message(version, environment, application_name,
63
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
58
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
64
59
  )
65
60
 
66
- #async
61
+ # async generative models
67
62
  wrap_function_wrapper(
68
63
  "vertexai.generative_models",
69
64
  "GenerativeModel.generate_content_async",
70
65
  async_send_message(version, environment, application_name,
71
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
66
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
72
67
  )
73
68
 
74
69
  wrap_function_wrapper(
75
70
  "vertexai.generative_models",
76
71
  "ChatSession.send_message_async",
77
72
  async_send_message(version, environment, application_name,
78
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
73
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
79
74
  )
80
75
 
76
+ # async language models
81
77
  wrap_function_wrapper(
82
78
  "vertexai.language_models",
83
79
  "ChatSession.send_message_async",
84
80
  async_send_message(version, environment, application_name,
85
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
81
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
86
82
  )
87
83
 
88
84
  wrap_function_wrapper(
89
85
  "vertexai.language_models",
90
86
  "ChatSession.send_message_streaming_async",
91
87
  async_send_message(version, environment, application_name,
92
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
88
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics),
93
89
  )
94
90
 
95
91
  def _uninstrument(self, **kwargs):
96
- # Proper uninstrumentation logic to revert patched methods
97
92
  pass
@@ -4,14 +4,15 @@ Module for monitoring VertexAI API calls.
4
4
 
5
5
  import logging
6
6
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import SpanKind
9
8
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
9
  handle_exception,
12
- calculate_ttft,
13
- calculate_tbt,
14
- create_metrics_attributes,
10
+ )
11
+ from openlit.instrumentation.vertexai.utils import (
12
+ process_chunk,
13
+ process_chat_response,
14
+ process_streaming_chat_response,
15
+ extract_vertexai_details,
15
16
  )
16
17
  from openlit.semcov import SemanticConvention
17
18
 
@@ -21,35 +22,19 @@ logger = logging.getLogger(__name__)
21
22
  def async_send_message(version, environment, application_name, tracer,
22
23
  pricing_info, capture_message_content, metrics, disable_metrics):
23
24
  """
24
- Generates a telemetry wrapper for messages to collect metrics.
25
-
26
- Args:
27
- version: Version of the monitoring package.
28
- environment: Deployment environment (e.g., production, staging).
29
- application_name: Name of the application using the VertexAI API.
30
- tracer: OpenTelemetry tracer for creating spans.
31
- pricing_info: Information used for calculating the cost of VertexAI usage.
32
- capture_message_content: Flag indicating whether to trace the actual content.
33
-
34
- Returns:
35
- A function that wraps the chat method to add telemetry.
25
+ Generates a telemetry wrapper for VertexAI AsyncMessages calls.
36
26
  """
37
27
 
38
28
  class TracedAsyncStream:
39
29
  """
40
- Wrapper for streaming responses to collect metrics and trace data.
41
- Wraps the response to collect message IDs and aggregated response.
42
-
43
- This class implements the '__aiter__' and '__anext__' methods that
44
- handle asynchronous streaming responses.
45
-
46
- This class also implements '__aenter__' and '__aexit__' methods that
47
- handle asynchronous context management protocol.
30
+ Wrapper for async streaming responses to collect telemetry.
48
31
  """
32
+
49
33
  def __init__(
50
34
  self,
51
35
  wrapped,
52
36
  span,
37
+ span_name,
53
38
  kwargs,
54
39
  server_address,
55
40
  server_port,
@@ -58,12 +43,10 @@ def async_send_message(version, environment, application_name, tracer,
58
43
  ):
59
44
  self.__wrapped__ = wrapped
60
45
  self._span = span
61
- # Placeholder for aggregating streaming response
46
+ self._span_name = span_name
62
47
  self._llmresponse = ""
63
- self._input_tokens = ""
64
- self._output_tokens = ""
65
-
66
- self._args = args
48
+ self._input_tokens = 0
49
+ self._output_tokens = 0
67
50
  self._kwargs = kwargs
68
51
  self._start_time = time.time()
69
52
  self._end_time = None
@@ -73,6 +56,7 @@ def async_send_message(version, environment, application_name, tracer,
73
56
  self._server_address = server_address
74
57
  self._server_port = server_port
75
58
  self._request_model = request_model
59
+ self._args = args
76
60
 
77
61
  async def __aenter__(self):
78
62
  await self.__wrapped__.__aenter__()
@@ -91,369 +75,67 @@ def async_send_message(version, environment, application_name, tracer,
91
75
  async def __anext__(self):
92
76
  try:
93
77
  chunk = await self.__wrapped__.__anext__()
94
- end_time = time.time()
95
- # Record the timestamp for the current chunk
96
- self._timestamps.append(end_time)
97
-
98
- if len(self._timestamps) == 1:
99
- # Calculate time to first chunk
100
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
101
-
102
- self._llmresponse += str(chunk.text)
103
- self._input_tokens = chunk.usage_metadata.prompt_token_count
104
- self._output_tokens = chunk.usage_metadata.candidates_token_count
105
-
78
+ process_chunk(self, chunk)
106
79
  return chunk
107
80
  except StopAsyncIteration:
108
- # Handling exception ensure observability without disrupting operation
109
81
  try:
110
- self._end_time = time.time()
111
- if len(self._timestamps) > 1:
112
- self._tbt = calculate_tbt(self._timestamps)
113
-
114
- # Format 'messages' into a single string
115
- message_prompt = self._kwargs.get("messages", "")
116
- formatted_messages = []
117
- for message in message_prompt:
118
- role = message["role"]
119
- content = message["content"]
120
-
121
- if isinstance(content, list):
122
- content_str_list = []
123
- for item in content:
124
- if item["type"] == "text":
125
- content_str_list.append(f'text: {item["text"]}')
126
- elif (item["type"] == "image_url" and
127
- not item["image_url"]["url"].startswith("data:")):
128
- content_str_list.append(f'image_url: {item["image_url"]["url"]}')
129
- content_str = ", ".join(content_str_list)
130
- formatted_messages.append(f"{role}: {content_str}")
131
- else:
132
- formatted_messages.append(f"{role}: {content}")
133
- prompt = "\n".join(formatted_messages) or str(self._args[0][0])
134
-
135
- # Calculate cost of the operation
136
- cost = get_chat_model_cost(self._request_model,
137
- pricing_info, self._input_tokens,
138
- self._output_tokens)
139
-
140
- # Set Span attributes (OTel Semconv)
141
- self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
142
- self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
143
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
144
- self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
145
- SemanticConvention.GEN_AI_SYSTEM_VERTEXAI)
146
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
147
- self._request_model)
148
- self._span.set_attribute(SemanticConvention.SERVER_PORT,
149
- self._server_port)
150
-
151
- inference_config = self._kwargs.get('generation_config', {})
152
-
153
- # List of attributes and their config keys
154
- attributes = [
155
- (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
156
- (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_output_tokens'),
157
- (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
158
- (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
159
- (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
160
- (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
161
- (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
162
- ]
163
-
164
- # Set each attribute if the corresponding value exists and is not None
165
- for attribute, key in attributes:
166
- # Use the `get` method to safely access keys in the dictionary
167
- value = inference_config.get(key)
168
- if value is not None:
169
- self._span.set_attribute(attribute, value)
170
-
171
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
172
- self._request_model)
173
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
174
- self._input_tokens)
175
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
176
- self._output_tokens)
177
- self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
178
- self._server_address)
179
- if isinstance(self._llmresponse, str):
180
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
181
- "text")
182
- else:
183
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
184
- "json")
185
-
186
- # Set Span attributes (Extra)
187
- self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
188
- environment)
189
- self._span.set_attribute(SERVICE_NAME,
190
- application_name)
191
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
192
- True)
193
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
194
- self._input_tokens + self._output_tokens)
195
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
196
- cost)
197
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
198
- self._tbt)
199
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
200
- self._ttft)
201
- self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
202
- version)
203
- if capture_message_content:
204
- self._span.add_event(
205
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
206
- attributes={
207
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
208
- },
209
- )
210
- self._span.add_event(
211
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
212
- attributes={
213
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
214
- },
215
- )
216
- self._span.set_status(Status(StatusCode.OK))
217
-
218
- if disable_metrics is False:
219
- attributes = create_metrics_attributes(
220
- service_name=application_name,
221
- deployment_environment=environment,
222
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
223
- system=SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
224
- request_model=self._request_model,
225
- server_address=self._server_address,
226
- server_port=self._server_port,
227
- response_model=self._request_model,
228
- )
229
-
230
- metrics["genai_client_usage_tokens"].record(
231
- self._input_tokens + self._output_tokens, attributes
232
- )
233
- metrics["genai_client_operation_duration"].record(
234
- self._end_time - self._start_time, attributes
235
- )
236
- metrics["genai_server_tbt"].record(
237
- self._tbt, attributes
238
- )
239
- metrics["genai_server_ttft"].record(
240
- self._ttft, attributes
82
+ with self._span:
83
+ process_streaming_chat_response(
84
+ self,
85
+ pricing_info=pricing_info,
86
+ environment=environment,
87
+ application_name=application_name,
88
+ metrics=metrics,
89
+ capture_message_content=capture_message_content,
90
+ disable_metrics=disable_metrics,
91
+ version=version
241
92
  )
242
- metrics["genai_requests"].add(1, attributes)
243
- metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
244
- metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
245
- metrics["genai_cost"].record(cost, attributes)
246
-
247
93
  except Exception as e:
248
94
  handle_exception(self._span, e)
249
- logger.error("Error in trace creation: %s", e)
250
- finally:
251
- self._span.end()
252
95
  raise
253
96
 
254
97
  async def wrapper(wrapped, instance, args, kwargs):
255
98
  """
256
- Wraps the 'messages' API call to add telemetry.
257
-
258
- This collects metrics such as execution time, cost, and token usage, and handles errors
259
- gracefully, adding details to the trace for observability.
260
-
261
- Args:
262
- wrapped: The original 'messages' method to be wrapped.
263
- instance: The instance of the class where the original method is defined.
264
- args: Positional arguments for the 'messages' method.
265
- kwargs: Keyword arguments for the 'messages' method.
266
-
267
- Returns:
268
- The response from the original 'messages' method.
99
+ Wraps the VertexAI async API call to add telemetry.
269
100
  """
270
101
 
271
- # Check if streaming is enabled for the API call
272
102
  streaming = kwargs.get("stream", False)
273
-
274
- try:
275
- location = instance._model._location
276
- request_model = "/".join(instance._model._model_name.split("/")[3:])
277
- except:
278
- location = instance._location
279
- request_model = "/".join(instance._model_name.split("/")[3:])
280
-
281
- server_address, server_port = location + '-aiplatform.googleapis.com', 443
103
+ server_address, server_port, request_model = extract_vertexai_details(instance)
282
104
 
283
105
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
284
106
 
285
- # pylint: disable=no-else-return
286
107
  if streaming:
287
- # Special handling for streaming response to accommodate the nature of data flow
288
108
  awaited_wrapped = await wrapped(*args, **kwargs)
289
109
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
290
110
 
291
- return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port, request_model, args)
111
+ return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port, request_model, args)
292
112
 
293
- # Handling for non-streaming responses
294
113
  else:
295
114
  with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
296
115
  start_time = time.time()
297
116
  response = await wrapped(*args, **kwargs)
298
- end_time = time.time()
299
117
 
300
118
  try:
301
- # Format 'messages' into a single string
302
- message_prompt = kwargs.get("contents", [])
303
- formatted_messages = []
304
-
305
- for content in message_prompt:
306
- role = content.role
307
- parts = content.parts
308
- content_str = []
309
-
310
- for part in parts:
311
- # Collect relevant fields and handle each type of data that Part could contain
312
- if part.text:
313
- content_str.append(f"text: {part.text}")
314
- if part.video_metadata:
315
- content_str.append(f"video_metadata: {part.video_metadata}")
316
- if part.thought:
317
- content_str.append(f"thought: {part.thought}")
318
- if part.code_execution_result:
319
- content_str.append(f"code_execution_result: {part.code_execution_result}")
320
- if part.executable_code:
321
- content_str.append(f"executable_code: {part.executable_code}")
322
- if part.file_data:
323
- content_str.append(f"file_data: {part.file_data}")
324
- if part.function_call:
325
- content_str.append(f"function_call: {part.function_call}")
326
- if part.function_response:
327
- content_str.append(f"function_response: {part.function_response}")
328
- if part.inline_data:
329
- content_str.append(f"inline_data: {part.inline_data}")
330
-
331
- formatted_messages.append(f"{role}: {', '.join(content_str)}")
332
-
333
- prompt = "\n".join(formatted_messages) or str(args[0][0])
334
-
335
- input_tokens = response.usage_metadata.prompt_token_count
336
- output_tokens = response.usage_metadata.candidates_token_count
337
-
338
- # Calculate cost of the operation
339
- cost = get_chat_model_cost(request_model,
340
- pricing_info, input_tokens,
341
- output_tokens)
342
-
343
- # Set base span attribues (OTel Semconv)
344
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
345
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
346
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
347
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
348
- SemanticConvention.GEN_AI_SYSTEM_VERTEXAI)
349
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
350
- request_model)
351
- span.set_attribute(SemanticConvention.SERVER_PORT,
352
- server_port)
353
-
354
- inference_config = kwargs.get('generation_config', {})
355
-
356
- # List of attributes and their config keys
357
- attributes = [
358
- (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequency_penalty'),
359
- (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'max_output_tokens'),
360
- (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presence_penalty'),
361
- (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stop_sequences'),
362
- (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
363
- (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'top_p'),
364
- (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'top_k'),
365
- ]
366
-
367
- # Set each attribute if the corresponding value exists and is not None
368
- for attribute, key in attributes:
369
- # Use the `get` method to safely access keys in the dictionary
370
- value = inference_config.get(key)
371
- if value is not None:
372
- span.set_attribute(attribute, value)
373
-
374
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
375
- request_model)
376
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
377
- input_tokens)
378
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
379
- output_tokens)
380
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
381
- server_address)
382
- # span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
383
- # [str(response.candidates[0].finish_reason)])
384
-
385
- # Set base span attribues (Extras)
386
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
387
- environment)
388
- span.set_attribute(SERVICE_NAME,
389
- application_name)
390
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
391
- False)
392
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
393
- input_tokens + output_tokens)
394
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
395
- cost)
396
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
397
- end_time - start_time)
398
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
399
- version)
400
- if capture_message_content:
401
- span.add_event(
402
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
403
- attributes={
404
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
405
- },
406
- )
407
- span.add_event(
408
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
409
- attributes={
410
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: response.text,
411
- },
412
- )
413
-
414
- if isinstance(response.text, str):
415
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
416
- "text")
417
- elif response.text is not None:
418
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
419
- "json")
420
-
421
- span.set_status(Status(StatusCode.OK))
422
-
423
- if disable_metrics is False:
424
- attributes = create_metrics_attributes(
425
- service_name=application_name,
426
- deployment_environment=environment,
427
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
428
- system=SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
429
- request_model=request_model,
430
- server_address=server_address,
431
- server_port=server_port,
432
- response_model=request_model,
433
- )
434
-
435
- metrics["genai_client_usage_tokens"].record(
436
- input_tokens + output_tokens, attributes
437
- )
438
- metrics["genai_client_operation_duration"].record(
439
- end_time - start_time, attributes
440
- )
441
- metrics["genai_server_ttft"].record(
442
- end_time - start_time, attributes
443
- )
444
- metrics["genai_requests"].add(1, attributes)
445
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
446
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
447
- metrics["genai_cost"].record(cost, attributes)
448
-
449
- # Return original response
450
- return response
119
+ response = process_chat_response(
120
+ response=response,
121
+ request_model=request_model,
122
+ pricing_info=pricing_info,
123
+ server_port=server_port,
124
+ server_address=server_address,
125
+ environment=environment,
126
+ application_name=application_name,
127
+ metrics=metrics,
128
+ start_time=start_time,
129
+ span=span,
130
+ capture_message_content=capture_message_content,
131
+ disable_metrics=disable_metrics,
132
+ version=version,
133
+ **kwargs
134
+ )
451
135
 
452
136
  except Exception as e:
453
137
  handle_exception(span, e)
454
- logger.error("Error in trace creation: %s", e)
455
138
 
456
- # Return original response
457
- return response
139
+ return response
458
140
 
459
141
  return wrapper