openlit 1.33.17__py3-none-any.whl → 1.33.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ """
2
+ Azure AI Inference OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import Status, StatusCode
8
+
9
+ from openlit.__helpers import (
10
+ calculate_ttft,
11
+ response_as_dict,
12
+ calculate_tbt,
13
+ extract_and_format_input,
14
+ get_chat_model_cost,
15
+ create_metrics_attributes,
16
+ otel_event,
17
+ concatenate_all_contents
18
+ )
19
+ from openlit.semcov import SemanticConvetion
20
+
21
+ def process_chunk(self, chunk):
22
+ """
23
+ Process a chunk of response data and update state.
24
+ """
25
+
26
+ end_time = time.time()
27
+ # Record the timestamp for the current chunk
28
+ self._timestamps.append(end_time)
29
+
30
+ if len(self._timestamps) == 1:
31
+ # Calculate time to first chunk
32
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
33
+
34
+ chunked = response_as_dict(chunk)
35
+
36
+ # Collect message IDs and aggregated response from events
37
+ if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
38
+ 'content' in chunked.get('choices')[0].get('delta'))):
39
+
40
+ if content := chunked.get('choices')[0].get('delta').get('content'):
41
+ self._llmresponse += content
42
+
43
+ if chunked.get('choices')[0].get('finish_reason') is not None:
44
+ self._finish_reason = chunked.get('choices')[0].get('finish_reason')
45
+
46
+ if chunked.get('usage') is not None:
47
+ self._input_tokens = chunked.get('usage').get('prompt_tokens')
48
+ self._response_id = chunked.get('id')
49
+ self._response_model = chunked.get('model')
50
+ self._output_tokens = chunked.get('usage').get('completion_tokens')
51
+
52
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
53
+ event_provider, capture_message_content, disable_metrics, version, is_stream):
54
+ """
55
+ Process chat request and generate Telemetry
56
+ """
57
+
58
+ scope._end_time = time.time()
59
+ if len(scope._timestamps) > 1:
60
+ scope._tbt = calculate_tbt(scope._timestamps)
61
+
62
+ formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
63
+ request_model = scope._kwargs.get('model', 'claude-3-opus-20240229')
64
+
65
+ cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
66
+
67
+ # Set Span attributes (OTel Semconv)
68
+ scope._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
69
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION, SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
70
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM, SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE)
71
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL, request_model)
72
+ scope._span.set_attribute(SemanticConvetion.SERVER_PORT, scope._server_port)
73
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, scope._kwargs.get('max_tokens', -1))
74
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, scope._kwargs.get('stop', []))
75
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, scope._kwargs.get('temperature', 1.0))
76
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K, scope._kwargs.get('top_k', 1.0))
77
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P, scope._kwargs.get('top_p', 1.0))
78
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
79
+ scope._kwargs.get('frequency_penalty', 0.0))
80
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
81
+ scope._kwargs.get('presence_penalty', 0.0))
82
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
83
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID, scope._response_id)
84
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL, scope._response_model)
85
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
86
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
87
+ scope._span.set_attribute(SemanticConvetion.SERVER_ADDRESS, scope._server_address)
88
+
89
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
90
+ 'text' if isinstance(scope._llmresponse, str) else 'json')
91
+
92
+ scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
93
+ scope._span.set_attribute(SERVICE_NAME, application_name)
94
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM, is_stream)
95
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
96
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST, cost)
97
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT, scope._tbt)
98
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT, scope._ttft)
99
+ scope._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION, version)
100
+
101
+ # To be removed one the change to log events (from span events) is complete
102
+ prompt = concatenate_all_contents(formatted_messages)
103
+ if capture_message_content:
104
+ scope._span.add_event(
105
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
106
+ attributes={
107
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
108
+ },
109
+ )
110
+ scope._span.add_event(
111
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
112
+ attributes={
113
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
114
+ },
115
+ )
116
+
117
+ choice_event_body = {
118
+ 'finish_reason': scope._finish_reason,
119
+ 'index': 0,
120
+ 'message': {
121
+ **({'content': scope._llmresponse} if capture_message_content else {}),
122
+ 'role': 'assistant'
123
+ }
124
+ }
125
+
126
+ # Emit events
127
+ for role in ['user', 'system', 'assistant', 'tool']:
128
+ if formatted_messages.get(role, {}).get('content', ''):
129
+ event = otel_event(
130
+ name=getattr(SemanticConvetion, f'GEN_AI_{role.upper()}_MESSAGE'),
131
+ attributes={
132
+ SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE
133
+ },
134
+ body = {
135
+ # pylint: disable=line-too-long
136
+ **({'content': formatted_messages.get(role, {}).get('content', '')} if capture_message_content else {}),
137
+ 'role': formatted_messages.get(role, {}).get('role', []),
138
+ **({
139
+ 'tool_calls': {
140
+ 'function': {
141
+ # pylint: disable=line-too-long
142
+ 'name': (scope._tool_calls[0].get('function', {}).get('name', '') if scope._tool_calls else ''),
143
+ 'arguments': (scope._tool_calls[0].get('function', {}).get('arguments', '') if scope._tool_calls else '')
144
+ },
145
+ 'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else ''),
146
+ 'type': 'function'
147
+ }
148
+ } if role == 'assistant' else {}),
149
+ **({
150
+ 'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else '')
151
+ } if role == 'tool' else {})
152
+ }
153
+ )
154
+ event_provider.emit(event)
155
+
156
+ choice_event = otel_event(
157
+ name=SemanticConvetion.GEN_AI_CHOICE,
158
+ attributes={
159
+ SemanticConvetion.GEN_AI_SYSTEM: SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE
160
+ },
161
+ body=choice_event_body
162
+ )
163
+ event_provider.emit(choice_event)
164
+
165
+ scope._span.set_status(Status(StatusCode.OK))
166
+
167
+ if not disable_metrics:
168
+ metrics_attributes = create_metrics_attributes(
169
+ service_name=application_name,
170
+ deployment_environment=environment,
171
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
172
+ system=SemanticConvetion.GEN_AI_SYSTEM_AZURE_AI_INFERENCE,
173
+ request_model=request_model,
174
+ server_address=scope._server_address,
175
+ server_port=scope._server_port,
176
+ response_model=scope._response_model,
177
+ )
178
+
179
+ metrics['genai_client_usage_tokens'].record(scope._input_tokens + scope._output_tokens, metrics_attributes)
180
+ metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
181
+ metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
182
+ metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
183
+ metrics['genai_requests'].add(1, metrics_attributes)
184
+ metrics['genai_completion_tokens'].add(scope._output_tokens, metrics_attributes)
185
+ metrics['genai_prompt_tokens'].add(scope._input_tokens, metrics_attributes)
186
+ metrics['genai_cost'].record(cost, metrics_attributes)
187
+
188
+ def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
189
+ event_provider, capture_message_content=False, disable_metrics=False, version=''):
190
+ """
191
+ Process chat request and generate Telemetry
192
+ """
193
+
194
+ common_chat_logic(self, pricing_info, environment, application_name, metrics,
195
+ event_provider, capture_message_content, disable_metrics, version, is_stream=True)
196
+
197
+ def process_chat_response(response, request_model, pricing_info, server_port, server_address,
198
+ environment, application_name, metrics, event_provider, start_time,
199
+ span, capture_message_content=False, disable_metrics=False, version='1.0.0', **kwargs):
200
+ """
201
+ Process chat request and generate Telemetry
202
+ """
203
+
204
+ self = type('GenericScope', (), {})()
205
+ response_dict = response_as_dict(response)
206
+
207
+ # pylint: disable = no-member
208
+ self._start_time = start_time
209
+ self._end_time = time.time()
210
+ self._span = span
211
+ self._llmresponse = response_dict.get('choices', {})[0].get('message', '').get('content', '')
212
+ self._input_tokens = response_dict.get('usage').get('prompt_tokens')
213
+ self._output_tokens = response_dict.get('usage').get('completion_tokens')
214
+ self._response_model = response_dict.get('model', '')
215
+ self._finish_reason = response_dict.get('choices', {})[0].get('finish_reason', '')
216
+ self._response_id = response_dict.get('id', '')
217
+ self._timestamps = []
218
+ self._ttft, self._tbt = self._end_time - self._start_time, 0
219
+ self._server_address, self._server_port = server_address, server_port
220
+ self._kwargs = kwargs
221
+
222
+ common_chat_logic(self, pricing_info, environment, application_name, metrics,
223
+ event_provider, capture_message_content, disable_metrics, version, is_stream=False)
224
+
225
+ return response
@@ -22,6 +22,7 @@ class BedrockInstrumentor(BaseInstrumentor):
22
22
  application_name = kwargs.get("application_name", "default_application")
23
23
  environment = kwargs.get("environment", "default_environment")
24
24
  tracer = kwargs.get("tracer")
25
+ event_provider = kwargs.get('event_provider')
25
26
  metrics = kwargs.get("metrics_dict")
26
27
  pricing_info = kwargs.get("pricing_info", {})
27
28
  capture_message_content = kwargs.get("capture_message_content", False)
@@ -33,7 +34,7 @@ class BedrockInstrumentor(BaseInstrumentor):
33
34
  "botocore.client",
34
35
  "ClientCreator.create_client",
35
36
  converse(version, environment, application_name,
36
- tracer, pricing_info, capture_message_content, metrics, disable_metrics),
37
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics),
37
38
  )
38
39
 
39
40
  def _uninstrument(self, **kwargs):
@@ -4,96 +4,33 @@ Module for monitoring Amazon Bedrock API calls.
4
4
 
5
5
  import logging
6
6
  import time
7
- from botocore.response import StreamingBody
8
- from botocore.exceptions import ReadTimeoutError, ResponseStreamingError
9
- from urllib3.exceptions import ProtocolError as URLLib3ProtocolError
10
- from urllib3.exceptions import ReadTimeoutError as URLLib3ReadTimeoutError
11
- from opentelemetry.trace import SpanKind, Status, StatusCode
12
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import SpanKind
13
8
  from openlit.__helpers import (
14
- get_chat_model_cost,
15
- handle_exception,
16
- response_as_dict,
17
- create_metrics_attributes,
18
9
  set_server_address_and_port
19
10
  )
11
+ from openlit.instrumentation.bedrock.utils import (
12
+ process_chat_response,
13
+ )
20
14
  from openlit.semcov import SemanticConvetion
21
15
 
22
16
  # Initialize logger for logging potential issues and operations
23
17
  logger = logging.getLogger(__name__)
24
18
 
25
- class CustomStreamWrapper(StreamingBody):
26
- """Handle streaming responses with the ability to read multiple times."""
27
-
28
- def __init__(self, stream_source, length):
29
- super().__init__(stream_source, length)
30
- self._stream_data = None
31
- self._read_position = 0
32
-
33
- def read(self, amt=None):
34
- if self._stream_data is None:
35
- try:
36
- self._stream_data = self._raw_stream.read()
37
- except URLLib3ReadTimeoutError as error:
38
- raise ReadTimeoutError(endpoint_url=error.url, error=error) from error
39
- except URLLib3ProtocolError as error:
40
- raise ResponseStreamingError(error=error) from error
41
-
42
- self._amount_read += len(self._stream_data)
43
- if amt is None or (not self._stream_data and amt > 0):
44
- self._verify_content_length()
45
-
46
- if amt is None:
47
- data_chunk = self._stream_data[self._read_position:]
48
- else:
49
- data_start = self._read_position
50
- self._read_position += amt
51
- data_chunk = self._stream_data[data_start:self._read_position]
52
-
53
- return data_chunk
54
-
55
- def converse(version, environment, application_name, tracer,
19
+ def converse(version, environment, application_name, tracer, event_provider,
56
20
  pricing_info, capture_message_content, metrics, disable_metrics):
57
21
  """
58
- Generates a telemetry wrapper for messages to collect metrics.
59
-
60
- Args:
61
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
62
- version: The monitoring package version.
63
- environment: Deployment environment (e.g. production, staging).
64
- application_name: Name of the application using the Bedrock API.
65
- tracer: OpenTelemetry tracer for creating spans.
66
- pricing_info: Information for calculating Bedrock usage cost.
67
- capture_message_content: Whether to trace the actual content.
68
- metrics: Metrics collector.
69
- disable_metrics: Flag to toggle metrics collection.
70
- Returns:
71
- A function that wraps the chat method to add telemetry.
22
+ Generates a telemetry wrapper for GenAI function call
72
23
  """
73
24
 
74
25
  def wrapper(wrapped, instance, args, kwargs):
75
26
  """
76
- Wraps an API call to add telemetry.
77
-
78
- Args:
79
- wrapped: Original method.
80
- instance: Instance of the class.
81
- args: Positional arguments of the 'messages' method.
82
- kwargs: Keyword arguments of the 'messages' method.
83
- Returns:
84
- Response from the original method.
27
+ Wraps the GenAI function call.
85
28
  """
86
29
 
87
30
  def converse_wrapper(original_method, *method_args, **method_kwargs):
88
- """
89
- Adds instrumentation to the invoke model call.
90
31
 
91
- Args:
92
- original_method: The original invoke model method.
93
- *method_args: Positional arguments for the method.
94
- **method_kwargs: Keyword arguments for the method.
95
- Returns:
96
- The modified response with telemetry.
32
+ """
33
+ Wraps the GenAI function call.
97
34
  """
98
35
 
99
36
  server_address, server_port = set_server_address_and_port(instance, 'aws.amazon.com', 443)
@@ -104,146 +41,27 @@ def converse(version, environment, application_name, tracer,
104
41
  with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
105
42
  start_time = time.time()
106
43
  response = original_method(*method_args, **method_kwargs)
107
- end_time = time.time()
108
-
109
- response_dict = response_as_dict(response)
110
-
111
- try:
112
- message_prompt = method_kwargs.get('messages', '')
113
- formatted_messages = []
114
- for message in message_prompt:
115
- role = message['role']
116
- content = message['content']
117
-
118
- if isinstance(content, list):
119
- content_str = ", ".join(f'text: {item["text"]}' for item in content if "text" in item)
120
- formatted_messages.append(f'{role}: {content_str}')
121
- else:
122
- formatted_messages.append(f'{role}: {content}')
123
- prompt = '\n'.join(formatted_messages)
124
-
125
- input_tokens = response_dict.get('usage').get('inputTokens')
126
- output_tokens = response_dict.get('usage').get('outputTokens')
127
-
128
- # Calculate cost of the operation
129
- cost = get_chat_model_cost(request_model, pricing_info,
130
- input_tokens, output_tokens)
131
-
132
- llm_response = response_dict.get('output').get('message').get('content')[0].get('text')
133
-
134
- # Set base span attribues (OTel Semconv)
135
- span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
136
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
137
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
138
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
139
- SemanticConvetion.GEN_AI_SYSTEM_AWS_BEDROCK)
140
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
141
- request_model)
142
- span.set_attribute(SemanticConvetion.SERVER_PORT,
143
- server_port)
144
-
145
- inference_config = method_kwargs.get('inferenceConfig', {})
146
-
147
- # List of attributes and their config keys
148
- attributes = [
149
- (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequencyPenalty'),
150
- (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'maxTokens'),
151
- (SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presencePenalty'),
152
- (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stopSequences'),
153
- (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
154
- (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'topP'),
155
- (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'topK'),
156
- ]
157
-
158
- # Set each attribute if the corresponding value exists and is not None
159
- for attribute, key in attributes:
160
- value = inference_config.get(key)
161
- if value is not None:
162
- span.set_attribute(attribute, value)
163
-
164
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
165
- response_dict.get('ResponseMetadata').get('RequestId'))
166
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
167
- request_model)
168
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
169
- input_tokens)
170
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
171
- output_tokens)
172
- span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
173
- server_address)
174
- if isinstance(llm_response, str):
175
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
176
- 'text')
177
- else:
178
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
179
- 'json')
180
-
181
- # Set base span attribues (Extras)
182
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
183
- environment)
184
- span.set_attribute(SERVICE_NAME,
185
- application_name)
186
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
187
- False)
188
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
189
- input_tokens + output_tokens)
190
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
191
- cost)
192
- span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
193
- end_time - start_time)
194
- span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
195
- version)
196
-
197
- if capture_message_content:
198
- span.add_event(
199
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
200
- attributes={
201
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
202
- },
203
- )
204
- span.add_event(
205
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
206
- attributes={
207
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llm_response,
208
- },
209
- )
210
-
211
- span.set_status(Status(StatusCode.OK))
212
-
213
- if disable_metrics is False:
214
- attributes = create_metrics_attributes(
215
- service_name=application_name,
216
- deployment_environment=environment,
217
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
218
- system=SemanticConvetion.GEN_AI_SYSTEM_AWS_BEDROCK,
219
- request_model=request_model,
220
- server_address=server_address,
221
- server_port=server_port,
222
- response_model=request_model,
223
- )
224
-
225
- metrics['genai_client_usage_tokens'].record(
226
- input_tokens + output_tokens, attributes
227
- )
228
- metrics['genai_client_operation_duration'].record(
229
- end_time - start_time, attributes
230
- )
231
- metrics['genai_server_ttft'].record(
232
- end_time - start_time, attributes
233
- )
234
- metrics['genai_requests'].add(1, attributes)
235
- metrics['genai_completion_tokens'].add(output_tokens, attributes)
236
- metrics['genai_prompt_tokens'].add(input_tokens, attributes)
237
- metrics['genai_cost'].record(cost, attributes)
238
-
239
- return response
240
-
241
- except Exception as e:
242
- handle_exception(span, e)
243
- logger.error('Error in trace creation: %s', e)
244
-
245
- # Return original response
246
- return response
44
+ llm_config = method_kwargs.get('inferenceConfig', {})
45
+ response = process_chat_response(
46
+ response=response,
47
+ request_model=request_model,
48
+ pricing_info=pricing_info,
49
+ server_port=server_port,
50
+ server_address=server_address,
51
+ environment=environment,
52
+ application_name=application_name,
53
+ metrics=metrics,
54
+ event_provider=event_provider,
55
+ start_time=start_time,
56
+ span=span,
57
+ capture_message_content=capture_message_content,
58
+ disable_metrics=disable_metrics,
59
+ version=version,
60
+ llm_config=llm_config,
61
+ **method_kwargs
62
+ )
63
+
64
+ return response
247
65
 
248
66
  # Get the original client instance from the wrapper
249
67
  client = wrapped(*args, **kwargs)