openlit 1.34.18__py3-none-any.whl → 1.34.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,64 +2,65 @@
2
2
  Module for monitoring Amazon Bedrock API calls.
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
6
  from opentelemetry.trace import SpanKind
8
7
  from openlit.__helpers import (
8
+ handle_exception,
9
9
  set_server_address_and_port
10
10
  )
11
11
  from openlit.instrumentation.bedrock.utils import (
12
+ process_chunk,
12
13
  process_chat_response,
14
+ process_streaming_chat_response,
13
15
  )
14
16
  from openlit.semcov import SemanticConvention
15
17
 
16
- # Initialize logger for logging potential issues and operations
17
- logger = logging.getLogger(__name__)
18
-
19
- def converse(version, environment, application_name, tracer, event_provider,
20
- pricing_info, capture_message_content, metrics, disable_metrics):
18
+ def converse(version, environment, application_name, tracer, pricing_info, capture_message_content, metrics, disable_metrics):
21
19
  """
22
- Generates a telemetry wrapper for GenAI function call
20
+ Generates a telemetry wrapper for AWS Bedrock converse calls.
23
21
  """
24
22
 
25
23
  def wrapper(wrapped, instance, args, kwargs):
26
24
  """
27
- Wraps the GenAI function call.
25
+ Wraps the ClientCreator.create_client call.
28
26
  """
29
27
 
30
28
  def converse_wrapper(original_method, *method_args, **method_kwargs):
31
-
32
29
  """
33
- Wraps the GenAI function call.
30
+ Wraps the individual converse method call.
34
31
  """
35
32
 
36
- server_address, server_port = set_server_address_and_port(instance, 'aws.amazon.com', 443)
37
- request_model = method_kwargs.get('modelId', 'amazon.titan-text-express-v1')
33
+ server_address, server_port = set_server_address_and_port(instance, "aws.amazon.com", 443)
34
+ request_model = method_kwargs.get("modelId", "amazon.titan-text-express-v1")
38
35
 
39
- span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
36
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
40
37
 
41
38
  with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
42
39
  start_time = time.time()
43
40
  response = original_method(*method_args, **method_kwargs)
44
- llm_config = method_kwargs.get('inferenceConfig', {})
45
- response = process_chat_response(
46
- response=response,
47
- request_model=request_model,
48
- pricing_info=pricing_info,
49
- server_port=server_port,
50
- server_address=server_address,
51
- environment=environment,
52
- application_name=application_name,
53
- metrics=metrics,
54
- event_provider=event_provider,
55
- start_time=start_time,
56
- span=span,
57
- capture_message_content=capture_message_content,
58
- disable_metrics=disable_metrics,
59
- version=version,
60
- llm_config=llm_config,
61
- **method_kwargs
62
- )
41
+ llm_config = method_kwargs.get("inferenceConfig", {})
42
+
43
+ try:
44
+ response = process_chat_response(
45
+ response=response,
46
+ request_model=request_model,
47
+ pricing_info=pricing_info,
48
+ server_port=server_port,
49
+ server_address=server_address,
50
+ environment=environment,
51
+ application_name=application_name,
52
+ metrics=metrics,
53
+ start_time=start_time,
54
+ span=span,
55
+ capture_message_content=capture_message_content,
56
+ disable_metrics=disable_metrics,
57
+ version=version,
58
+ llm_config=llm_config,
59
+ **method_kwargs
60
+ )
61
+
62
+ except Exception as e:
63
+ handle_exception(span, e)
63
64
 
64
65
  return response
65
66
 
@@ -67,10 +68,143 @@ def converse(version, environment, application_name, tracer, event_provider,
67
68
  client = wrapped(*args, **kwargs)
68
69
 
69
70
  # Replace the original method with the instrumented one
70
- if kwargs.get('service_name') == 'bedrock-runtime':
71
+ if kwargs.get("service_name") == "bedrock-runtime":
71
72
  original_invoke_model = client.converse
72
- client.converse = lambda *args, **kwargs: converse_wrapper(original_invoke_model,
73
- *args, **kwargs)
73
+ client.converse = lambda *args, **kwargs: converse_wrapper(original_invoke_model, *args, **kwargs)
74
+
75
+ return client
76
+
77
+ return wrapper
78
+
79
+ def converse_stream(version, environment, application_name, tracer, pricing_info, capture_message_content, metrics, disable_metrics):
80
+ """
81
+ Generates a telemetry wrapper for AWS Bedrock converse_stream calls.
82
+ """
83
+
84
+ class TracedSyncStream:
85
+ """
86
+ Wrapper for streaming responses to collect telemetry.
87
+ """
88
+
89
+ def __init__(
90
+ self,
91
+ wrapped_response,
92
+ span,
93
+ span_name,
94
+ kwargs,
95
+ server_address,
96
+ server_port,
97
+ **args,
98
+ ):
99
+ self.__wrapped_response = wrapped_response
100
+ # Extract the actual stream iterator from the response
101
+ if isinstance(wrapped_response, dict) and "stream" in wrapped_response:
102
+ self.__wrapped_stream = iter(wrapped_response["stream"])
103
+ else:
104
+ self.__wrapped_stream = iter(wrapped_response)
105
+
106
+ self._span = span
107
+ self._span_name = span_name
108
+ self._llmresponse = ""
109
+ self._response_id = ""
110
+ self._response_model = ""
111
+ self._finish_reason = ""
112
+ self._tools = None
113
+ self._input_tokens = 0
114
+ self._output_tokens = 0
115
+
116
+ self._args = args
117
+ self._kwargs = kwargs
118
+ self._start_time = time.time()
119
+ self._end_time = None
120
+ self._timestamps = []
121
+ self._ttft = 0
122
+ self._tbt = 0
123
+ self._server_address = server_address
124
+ self._server_port = server_port
125
+
126
+ def __enter__(self):
127
+ if hasattr(self.__wrapped_stream, "__enter__"):
128
+ self.__wrapped_stream.__enter__()
129
+ return self
130
+
131
+ def __exit__(self, exc_type, exc_value, traceback):
132
+ if hasattr(self.__wrapped_stream, "__exit__"):
133
+ self.__wrapped_stream.__exit__(exc_type, exc_value, traceback)
134
+
135
+ def __iter__(self):
136
+ return self
137
+
138
+ def __getattr__(self, name):
139
+ """Delegate attribute access to the wrapped response."""
140
+ return getattr(self.__wrapped_response, name)
141
+
142
+ def get(self, key, default=None):
143
+ """Delegate get method to the wrapped response if its a dict."""
144
+ if isinstance(self.__wrapped_response, dict):
145
+ return self.__wrapped_response.get(key, default)
146
+ return getattr(self.__wrapped_response, key, default)
147
+
148
+ def __getitem__(self, key):
149
+ """Delegate item access to the wrapped response if its a dict."""
150
+ if isinstance(self.__wrapped_response, dict):
151
+ return self.__wrapped_response[key]
152
+ return getattr(self.__wrapped_response, key)
153
+
154
+ def __next__(self):
155
+ try:
156
+ chunk = next(self.__wrapped_stream)
157
+ process_chunk(self, chunk)
158
+ return chunk
159
+ except StopIteration:
160
+ try:
161
+ llm_config = self._kwargs.get("inferenceConfig", {})
162
+ with tracer.start_as_current_span(self._span_name, kind=SpanKind.CLIENT) as self._span:
163
+ process_streaming_chat_response(
164
+ self,
165
+ pricing_info=pricing_info,
166
+ environment=environment,
167
+ application_name=application_name,
168
+ metrics=metrics,
169
+ capture_message_content=capture_message_content,
170
+ disable_metrics=disable_metrics,
171
+ version=version,
172
+ llm_config=llm_config
173
+ )
174
+
175
+ except Exception as e:
176
+ handle_exception(self._span, e)
177
+
178
+ raise
179
+
180
+ def wrapper(wrapped, instance, args, kwargs):
181
+ """
182
+ Wraps the ClientCreator.create_client call.
183
+ """
184
+
185
+ def converse_stream_wrapper(original_method, *method_args, **method_kwargs):
186
+ """
187
+ Wraps the individual converse_stream method call.
188
+ """
189
+
190
+ server_address, server_port = set_server_address_and_port(instance, "aws.amazon.com", 443)
191
+ request_model = method_kwargs.get("modelId", "amazon.titan-text-express-v1")
192
+
193
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
194
+
195
+ # Get the streaming response
196
+ stream_response = original_method(*method_args, **method_kwargs)
197
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
198
+
199
+ return TracedSyncStream(stream_response, span, span_name, method_kwargs, server_address, server_port)
200
+
201
+ # Get the original client instance from the wrapper
202
+ client = wrapped(*args, **kwargs)
203
+
204
+ # Replace the original method with the instrumented one
205
+ if kwargs.get("service_name") == "bedrock-runtime":
206
+ original_stream_model = client.converse_stream
207
+ client.converse_stream = lambda *args, **kwargs: converse_stream_wrapper(original_stream_model, *args, **kwargs)
74
208
 
75
209
  return client
76
210
 
@@ -3,21 +3,55 @@ AWS Bedrock OpenTelemetry instrumentation utility functions
3
3
  """
4
4
  import time
5
5
 
6
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
6
  from opentelemetry.trace import Status, StatusCode
8
7
 
9
8
  from openlit.__helpers import (
10
9
  calculate_ttft,
11
10
  response_as_dict,
12
11
  calculate_tbt,
13
- extract_and_format_input,
14
12
  get_chat_model_cost,
15
- create_metrics_attributes,
16
- otel_event,
17
- concatenate_all_contents
13
+ record_completion_metrics,
14
+ common_span_attributes,
15
+ handle_exception
18
16
  )
19
17
  from openlit.semcov import SemanticConvention
20
18
 
19
+ def format_content(messages):
20
+ """
21
+ Format the messages into a string for span events.
22
+ """
23
+
24
+ if not messages:
25
+ return ""
26
+
27
+ formatted_messages = []
28
+ for message in messages:
29
+ if isinstance(message, dict):
30
+ role = message.get("role", "user")
31
+ content = message.get("content", "")
32
+ else:
33
+ # Handle Bedrock object format
34
+ role = getattr(message, "role", "user")
35
+ content = getattr(message, "content", "")
36
+
37
+ if isinstance(content, list):
38
+ # Handle structured content (e.g., text + images)
39
+ text_parts = []
40
+ for part in content:
41
+ if isinstance(part, dict):
42
+ # Bedrock format: {"text": "content"} or generic format: {"type": "text", "text": "content"}
43
+ if "text" in part:
44
+ text_parts.append(part.get("text", ""))
45
+ elif part.get("type") == "text":
46
+ text_parts.append(part.get("text", ""))
47
+ content = " ".join(text_parts)
48
+ elif not isinstance(content, str):
49
+ content = str(content)
50
+
51
+ formatted_messages.append(f"{role}: {content}")
52
+
53
+ return "\n".join(formatted_messages)
54
+
21
55
  def process_chunk(self, chunk):
22
56
  """
23
57
  Process a chunk of response data and update state.
@@ -33,33 +67,33 @@ def process_chunk(self, chunk):
33
67
 
34
68
  chunked = response_as_dict(chunk)
35
69
 
36
- # Collect message IDs and input token from events
37
- if chunked.get('type') == 'message_start':
38
- self._response_id = chunked.get('message').get('id')
39
- self._input_tokens = chunked.get('message').get('usage').get('input_tokens')
40
- self._response_model = chunked.get('message').get('model')
41
- self._response_role = chunked.get('message').get('role')
42
-
43
- # Collect message IDs and aggregated response from events
44
- if chunked.get('type') == 'content_block_delta':
45
- if chunked.get('delta').get('text'):
46
- self._llmresponse += chunked.get('delta').get('text')
47
- elif chunked.get('delta').get('partial_json'):
48
- self._tool_arguments += chunked.get('delta').get('partial_json')
49
-
50
- if chunked.get('type') == 'content_block_start':
51
- if chunked.get('content_block').get('id'):
52
- self._tool_id = chunked.get('content_block').get('id')
53
- if chunked.get('content_block').get('name'):
54
- self._tool_name = chunked.get('content_block').get('name')
55
-
56
- # Collect output tokens and stop reason from events
57
- if chunked.get('type') == 'message_delta':
58
- self._output_tokens = chunked.get('usage').get('output_tokens')
59
- self._finish_reason = chunked.get('delta').get('stop_reason')
70
+ # Handle Bedrock messageStart event
71
+ if "messageStart" in chunked:
72
+ message_start = chunked.get("messageStart", {})
73
+ self._response_role = message_start.get("role", "assistant")
74
+
75
+ # Handle Bedrock contentBlockDelta event
76
+ if "contentBlockDelta" in chunked:
77
+ content_delta = chunked.get("contentBlockDelta", {})
78
+ delta = content_delta.get("delta", {})
79
+ if "text" in delta:
80
+ self._llmresponse += delta.get("text", "")
81
+
82
+ # Handle Bedrock messageStop event
83
+ if "messageStop" in chunked:
84
+ message_stop = chunked.get("messageStop", {})
85
+ self._finish_reason = message_stop.get("stopReason", "")
86
+
87
+ # Handle Bedrock metadata event (final event with usage info)
88
+ if "metadata" in chunked:
89
+ metadata = chunked.get("metadata", {})
90
+ usage = metadata.get("usage", {})
91
+ self._input_tokens = usage.get("inputTokens", 0)
92
+ self._output_tokens = usage.get("outputTokens", 0)
93
+ self._end_time = end_time
60
94
 
61
95
  def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
62
- event_provider, capture_message_content, disable_metrics, version, llm_config, is_stream):
96
+ capture_message_content, disable_metrics, version, llm_config, is_stream):
63
97
  """
64
98
  Process chat request and generate Telemetry
65
99
  """
@@ -68,62 +102,55 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
68
102
  if len(scope._timestamps) > 1:
69
103
  scope._tbt = calculate_tbt(scope._timestamps)
70
104
 
71
- formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
72
- print(formatted_messages)
73
- request_model = scope._kwargs.get('model', 'claude-3-opus-20240229')
105
+ formatted_messages = format_content(scope._kwargs.get("messages", []))
106
+ request_model = scope._kwargs.get("modelId", "amazon.titan-text-express-v1")
74
107
 
75
108
  cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
76
109
 
77
- # Set Span attributes (OTel Semconv)
78
- scope._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
79
- scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
80
- scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_AWS_BEDROCK)
81
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
82
- scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
83
-
84
- # List of attributes and their config keys
85
- attributes = [
86
- (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequencyPenalty'),
87
- (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'maxTokens'),
88
- (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presencePenalty'),
89
- (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stopSequences'),
90
- (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
91
- (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'topP'),
92
- (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'topK'),
110
+ # Common Span Attributes
111
+ common_span_attributes(scope,
112
+ SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AWS_BEDROCK,
113
+ scope._server_address, scope._server_port, request_model, scope._response_model,
114
+ environment, application_name, is_stream, scope._tbt, scope._ttft, version)
115
+
116
+ # Bedrock-specific attributes from llm_config
117
+ bedrock_attributes = [
118
+ (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, "frequencyPenalty"),
119
+ (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, "maxTokens"),
120
+ (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, "presencePenalty"),
121
+ (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, "stopSequences"),
122
+ (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, "temperature"),
123
+ (SemanticConvention.GEN_AI_REQUEST_TOP_P, "topP"),
124
+ (SemanticConvention.GEN_AI_REQUEST_TOP_K, "topK"),
93
125
  ]
94
126
 
95
- # Set each attribute if the corresponding value exists and is not None
96
- for attribute, key in attributes:
127
+ # Set each bedrock-specific attribute if the corresponding value exists and is not None
128
+ for attribute, key in bedrock_attributes:
97
129
  value = llm_config.get(key)
98
130
  if value is not None:
99
131
  scope._span.set_attribute(attribute, value)
100
132
 
101
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
133
+ # Span Attributes for Response parameters
102
134
  scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
103
- scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
135
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
136
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text" if isinstance(scope._llmresponse, str) else "json")
137
+
138
+ # Span Attributes for Cost and Tokens
104
139
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
105
140
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
106
- scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
107
-
108
- scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
109
- 'text' if isinstance(scope._llmresponse, str) else 'json')
110
-
111
- scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
112
- scope._span.set_attribute(SERVICE_NAME, application_name)
113
- scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
114
141
  scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
115
142
  scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
116
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
117
- scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
118
- scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
119
143
 
120
- # To be removed one the change to log events (from span events) is complete
121
- prompt = concatenate_all_contents(formatted_messages)
144
+ # Span Attributes for Content
122
145
  if capture_message_content:
146
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, formatted_messages)
147
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse)
148
+
149
+ # To be removed once the change to span_attributes (from span events) is complete
123
150
  scope._span.add_event(
124
151
  name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
125
152
  attributes={
126
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
153
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: formatted_messages,
127
154
  },
128
155
  )
129
156
  scope._span.add_event(
@@ -133,120 +160,64 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
133
160
  },
134
161
  )
135
162
 
136
- choice_event_body = {
137
- 'finish_reason': scope._finish_reason,
138
- 'index': 0,
139
- 'message': {
140
- **({'content': scope._llmresponse} if capture_message_content else {}),
141
- 'role': scope._response_role
142
- }
143
- }
144
-
145
- # Emit events
146
- for role in ['user', 'system', 'assistant', 'tool']:
147
- if formatted_messages.get(role, {}).get('content', ''):
148
- event = otel_event(
149
- name=getattr(SemanticConvention, f'GEN_AI_{role.upper()}_MESSAGE'),
150
- attributes={
151
- SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AWS_BEDROCK
152
- },
153
- body = {
154
- # pylint: disable=line-too-long
155
- **({'content': formatted_messages.get(role, {}).get('content', '')} if capture_message_content else {}),
156
- 'role': formatted_messages.get(role, {}).get('role', []),
157
- **({
158
- 'tool_calls': {
159
- 'function': {
160
- # pylint: disable=line-too-long
161
- 'name': (scope._tool_calls[0].get('function', {}).get('name', '') if scope._tool_calls else ''),
162
- 'arguments': (scope._tool_calls[0].get('function', {}).get('arguments', '') if scope._tool_calls else '')
163
- },
164
- 'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else ''),
165
- 'type': 'function'
166
- }
167
- } if role == 'assistant' else {}),
168
- **({
169
- 'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else '')
170
- } if role == 'tool' else {})
171
- }
172
- )
173
- event_provider.emit(event)
174
-
175
- choice_event = otel_event(
176
- name=SemanticConvention.GEN_AI_CHOICE,
177
- attributes={
178
- SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AWS_BEDROCK
179
- },
180
- body=choice_event_body
181
- )
182
- event_provider.emit(choice_event)
183
-
184
163
  scope._span.set_status(Status(StatusCode.OK))
185
164
 
165
+ # Record metrics
186
166
  if not disable_metrics:
187
- metrics_attributes = create_metrics_attributes(
188
- service_name=application_name,
189
- deployment_environment=environment,
190
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
191
- system=SemanticConvention.GEN_AI_SYSTEM_AWS_BEDROCK,
192
- request_model=request_model,
193
- server_address=scope._server_address,
194
- server_port=scope._server_port,
195
- response_model=scope._response_model,
196
- )
197
-
198
- metrics['genai_client_usage_tokens'].record(scope._input_tokens + scope._output_tokens, metrics_attributes)
199
- metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
200
- metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
201
- metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
202
- metrics['genai_requests'].add(1, metrics_attributes)
203
- metrics['genai_completion_tokens'].add(scope._output_tokens, metrics_attributes)
204
- metrics['genai_prompt_tokens'].add(scope._input_tokens, metrics_attributes)
205
- metrics['genai_cost'].record(cost, metrics_attributes)
206
-
207
- def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
208
- event_provider, capture_message_content=False, disable_metrics=False, version='', llm_config=''):
167
+ record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_AWS_BEDROCK,
168
+ scope._server_address, scope._server_port, request_model, scope._response_model, environment,
169
+ application_name, scope._start_time, scope._end_time, scope._input_tokens, scope._output_tokens,
170
+ cost, scope._tbt, scope._ttft)
209
171
 
172
+ def process_streaming_chat_response(scope, pricing_info, environment, application_name, metrics,
173
+ capture_message_content=False, disable_metrics=False, version="", llm_config=None):
210
174
  """
211
- Process chat request and generate Telemetry
175
+ Process streaming chat response and generate telemetry.
212
176
  """
213
- if self._tool_id != '':
214
- self._tool_calls = {
215
- 'id': self._tool_id,
216
- 'name': self._tool_name,
217
- 'input': self._tool_arguments
218
- }
219
177
 
220
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
221
- event_provider, capture_message_content, disable_metrics, version, llm_config, is_stream=True)
178
+ try:
179
+ if llm_config is None:
180
+ llm_config = {}
222
181
 
223
- def process_chat_response(response, request_model, pricing_info, server_port, server_address, environment,
224
- application_name, metrics, event_provider, start_time, span, capture_message_content=False,
225
- disable_metrics=False, version='1.0.0', llm_config='', **kwargs):
182
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
183
+ capture_message_content, disable_metrics, version, llm_config, is_stream=True)
184
+ except Exception as e:
185
+ handle_exception(scope._span, e)
186
+ raise
226
187
 
188
+ def process_chat_response(response, request_model, pricing_info, server_port, server_address, environment,
189
+ application_name, metrics, start_time, span, capture_message_content=False,
190
+ disable_metrics=False, version="1.0.0", llm_config=None, **kwargs):
227
191
  """
228
- Process chat request and generate Telemetry
192
+ Process non-streaming chat response and generate telemetry.
229
193
  """
230
194
 
231
- self = type('GenericScope', (), {})()
232
- response_dict = response_as_dict(response)
233
-
234
- # pylint: disable = no-member
235
- self._start_time = start_time
236
- self._end_time = time.time()
237
- self._span = span
238
- self._llmresponse = response_dict.get('output').get('message').get('content')[0].get('text')
239
- self._response_role = 'assistant'
240
- self._input_tokens = response_dict.get('usage').get('inputTokens')
241
- self._output_tokens = response_dict.get('usage').get('outputTokens')
242
- self._response_model = request_model
243
- self._finish_reason = response_dict.get('stopReason', '')
244
- self._response_id = response_dict.get('ResponseMetadata').get('RequestId')
245
- self._timestamps = []
246
- self._ttft, self._tbt = self._end_time - self._start_time, 0
247
- self._server_address, self._server_port = server_address, server_port
248
- self._kwargs = kwargs
249
- common_chat_logic(self, pricing_info, environment, application_name, metrics,
250
- event_provider, capture_message_content, disable_metrics, version, llm_config, is_stream=False)
251
-
252
- return response
195
+ try:
196
+ if llm_config is None:
197
+ llm_config = {}
198
+
199
+ scope = type("GenericScope", (), {})()
200
+ response_dict = response_as_dict(response)
201
+
202
+ scope._start_time = start_time
203
+ scope._end_time = time.time()
204
+ scope._span = span
205
+ scope._llmresponse = response_dict.get("output", {}).get("message", {}).get("content", [{}])[0].get("text", "")
206
+ scope._response_role = response_dict.get("output", {}).get("message", {}).get("role", "assistant")
207
+ scope._input_tokens = response_dict.get("usage", {}).get("inputTokens", 0)
208
+ scope._output_tokens = response_dict.get("usage", {}).get("outputTokens", 0)
209
+ scope._response_model = request_model
210
+ scope._finish_reason = response_dict.get("stopReason", "")
211
+ scope._response_id = response_dict.get("RequestId", "")
212
+ scope._timestamps = []
213
+ scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
214
+ scope._server_address, scope._server_port = server_address, server_port
215
+ scope._kwargs = kwargs
216
+
217
+ common_chat_logic(scope, pricing_info, environment, application_name, metrics,
218
+ capture_message_content, disable_metrics, version, llm_config, is_stream=False)
219
+
220
+ return response
221
+ except Exception as e:
222
+ handle_exception(span, e)
223
+ raise
@@ -68,9 +68,9 @@ def acompletion(version, environment, application_name, tracer, pricing_info,
68
68
  def __aiter__(self):
69
69
  return self
70
70
 
71
- def __getattr__(self, name):
71
+ async def __getattr__(self, name):
72
72
  """Delegate attribute access to the wrapped object."""
73
- return getattr(self.__wrapped__, name)
73
+ return getattr(await self.__wrapped__, name)
74
74
 
75
75
  async def __anext__(self):
76
76
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openlit
3
- Version: 1.34.18
3
+ Version: 1.34.20
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
5
5
  License: Apache-2.0
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu