openlit 1.33.18__py3-none-any.whl → 1.33.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. openlit/__helpers.py +11 -41
  2. openlit/__init__.py +3 -3
  3. openlit/evals/utils.py +7 -7
  4. openlit/guard/utils.py +7 -7
  5. openlit/instrumentation/ag2/ag2.py +24 -24
  6. openlit/instrumentation/ai21/ai21.py +3 -3
  7. openlit/instrumentation/ai21/async_ai21.py +3 -3
  8. openlit/instrumentation/ai21/utils.py +59 -59
  9. openlit/instrumentation/anthropic/anthropic.py +2 -2
  10. openlit/instrumentation/anthropic/async_anthropic.py +2 -2
  11. openlit/instrumentation/anthropic/utils.py +34 -34
  12. openlit/instrumentation/assemblyai/assemblyai.py +24 -24
  13. openlit/instrumentation/astra/astra.py +3 -3
  14. openlit/instrumentation/astra/async_astra.py +3 -3
  15. openlit/instrumentation/astra/utils.py +39 -39
  16. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +10 -10
  17. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +10 -10
  18. openlit/instrumentation/azure_ai_inference/utils.py +38 -38
  19. openlit/instrumentation/bedrock/__init__.py +2 -1
  20. openlit/instrumentation/bedrock/bedrock.py +32 -214
  21. openlit/instrumentation/bedrock/utils.py +252 -0
  22. openlit/instrumentation/chroma/chroma.py +57 -57
  23. openlit/instrumentation/cohere/async_cohere.py +88 -88
  24. openlit/instrumentation/cohere/cohere.py +88 -88
  25. openlit/instrumentation/controlflow/controlflow.py +15 -15
  26. openlit/instrumentation/crawl4ai/async_crawl4ai.py +14 -14
  27. openlit/instrumentation/crawl4ai/crawl4ai.py +14 -14
  28. openlit/instrumentation/crewai/crewai.py +22 -22
  29. openlit/instrumentation/dynamiq/dynamiq.py +19 -19
  30. openlit/instrumentation/elevenlabs/async_elevenlabs.py +24 -25
  31. openlit/instrumentation/elevenlabs/elevenlabs.py +23 -25
  32. openlit/instrumentation/embedchain/embedchain.py +15 -15
  33. openlit/instrumentation/firecrawl/firecrawl.py +10 -10
  34. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +33 -33
  35. openlit/instrumentation/google_ai_studio/google_ai_studio.py +33 -33
  36. openlit/instrumentation/gpt4all/gpt4all.py +78 -78
  37. openlit/instrumentation/gpu/__init__.py +8 -8
  38. openlit/instrumentation/groq/async_groq.py +74 -74
  39. openlit/instrumentation/groq/groq.py +74 -74
  40. openlit/instrumentation/haystack/haystack.py +6 -6
  41. openlit/instrumentation/julep/async_julep.py +14 -14
  42. openlit/instrumentation/julep/julep.py +14 -14
  43. openlit/instrumentation/langchain/async_langchain.py +39 -39
  44. openlit/instrumentation/langchain/langchain.py +39 -39
  45. openlit/instrumentation/letta/letta.py +26 -26
  46. openlit/instrumentation/litellm/async_litellm.py +94 -94
  47. openlit/instrumentation/litellm/litellm.py +94 -94
  48. openlit/instrumentation/llamaindex/llamaindex.py +7 -7
  49. openlit/instrumentation/mem0/mem0.py +13 -13
  50. openlit/instrumentation/milvus/milvus.py +47 -47
  51. openlit/instrumentation/mistral/async_mistral.py +88 -88
  52. openlit/instrumentation/mistral/mistral.py +88 -88
  53. openlit/instrumentation/multion/async_multion.py +21 -21
  54. openlit/instrumentation/multion/multion.py +21 -21
  55. openlit/instrumentation/ollama/async_ollama.py +3 -3
  56. openlit/instrumentation/ollama/ollama.py +3 -3
  57. openlit/instrumentation/ollama/utils.py +50 -50
  58. openlit/instrumentation/openai/async_openai.py +225 -225
  59. openlit/instrumentation/openai/openai.py +225 -225
  60. openlit/instrumentation/openai_agents/openai_agents.py +11 -11
  61. openlit/instrumentation/phidata/phidata.py +15 -15
  62. openlit/instrumentation/pinecone/pinecone.py +43 -43
  63. openlit/instrumentation/premai/premai.py +86 -86
  64. openlit/instrumentation/qdrant/async_qdrant.py +95 -95
  65. openlit/instrumentation/qdrant/qdrant.py +99 -99
  66. openlit/instrumentation/reka/async_reka.py +33 -33
  67. openlit/instrumentation/reka/reka.py +33 -33
  68. openlit/instrumentation/together/async_together.py +90 -90
  69. openlit/instrumentation/together/together.py +90 -90
  70. openlit/instrumentation/transformers/transformers.py +26 -26
  71. openlit/instrumentation/vertexai/async_vertexai.py +64 -64
  72. openlit/instrumentation/vertexai/vertexai.py +64 -64
  73. openlit/instrumentation/vllm/vllm.py +24 -24
  74. openlit/otel/metrics.py +11 -11
  75. openlit/semcov/__init__.py +3 -3
  76. {openlit-1.33.18.dist-info → openlit-1.33.20.dist-info}/METADATA +8 -8
  77. openlit-1.33.20.dist-info/RECORD +131 -0
  78. {openlit-1.33.18.dist-info → openlit-1.33.20.dist-info}/WHEEL +1 -1
  79. openlit-1.33.18.dist-info/RECORD +0 -130
  80. {openlit-1.33.18.dist-info → openlit-1.33.20.dist-info}/LICENSE +0 -0
@@ -4,246 +4,64 @@ Module for monitoring Amazon Bedrock API calls.
4
4
 
5
5
  import logging
6
6
  import time
7
- from botocore.response import StreamingBody
8
- from botocore.exceptions import ReadTimeoutError, ResponseStreamingError
9
- from urllib3.exceptions import ProtocolError as URLLib3ProtocolError
10
- from urllib3.exceptions import ReadTimeoutError as URLLib3ReadTimeoutError
11
- from opentelemetry.trace import SpanKind, Status, StatusCode
12
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import SpanKind
13
8
  from openlit.__helpers import (
14
- get_chat_model_cost,
15
- handle_exception,
16
- response_as_dict,
17
- create_metrics_attributes,
18
9
  set_server_address_and_port
19
10
  )
20
- from openlit.semcov import SemanticConvetion
11
+ from openlit.instrumentation.bedrock.utils import (
12
+ process_chat_response,
13
+ )
14
+ from openlit.semcov import SemanticConvention
21
15
 
22
16
  # Initialize logger for logging potential issues and operations
23
17
  logger = logging.getLogger(__name__)
24
18
 
25
- class CustomStreamWrapper(StreamingBody):
26
- """Handle streaming responses with the ability to read multiple times."""
27
-
28
- def __init__(self, stream_source, length):
29
- super().__init__(stream_source, length)
30
- self._stream_data = None
31
- self._read_position = 0
32
-
33
- def read(self, amt=None):
34
- if self._stream_data is None:
35
- try:
36
- self._stream_data = self._raw_stream.read()
37
- except URLLib3ReadTimeoutError as error:
38
- raise ReadTimeoutError(endpoint_url=error.url, error=error) from error
39
- except URLLib3ProtocolError as error:
40
- raise ResponseStreamingError(error=error) from error
41
-
42
- self._amount_read += len(self._stream_data)
43
- if amt is None or (not self._stream_data and amt > 0):
44
- self._verify_content_length()
45
-
46
- if amt is None:
47
- data_chunk = self._stream_data[self._read_position:]
48
- else:
49
- data_start = self._read_position
50
- self._read_position += amt
51
- data_chunk = self._stream_data[data_start:self._read_position]
52
-
53
- return data_chunk
54
-
55
- def converse(version, environment, application_name, tracer,
19
+ def converse(version, environment, application_name, tracer, event_provider,
56
20
  pricing_info, capture_message_content, metrics, disable_metrics):
57
21
  """
58
- Generates a telemetry wrapper for messages to collect metrics.
59
-
60
- Args:
61
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
62
- version: The monitoring package version.
63
- environment: Deployment environment (e.g. production, staging).
64
- application_name: Name of the application using the Bedrock API.
65
- tracer: OpenTelemetry tracer for creating spans.
66
- pricing_info: Information for calculating Bedrock usage cost.
67
- capture_message_content: Whether to trace the actual content.
68
- metrics: Metrics collector.
69
- disable_metrics: Flag to toggle metrics collection.
70
- Returns:
71
- A function that wraps the chat method to add telemetry.
22
+ Generates a telemetry wrapper for GenAI function call
72
23
  """
73
24
 
74
25
  def wrapper(wrapped, instance, args, kwargs):
75
26
  """
76
- Wraps an API call to add telemetry.
77
-
78
- Args:
79
- wrapped: Original method.
80
- instance: Instance of the class.
81
- args: Positional arguments of the 'messages' method.
82
- kwargs: Keyword arguments of the 'messages' method.
83
- Returns:
84
- Response from the original method.
27
+ Wraps the GenAI function call.
85
28
  """
86
29
 
87
30
  def converse_wrapper(original_method, *method_args, **method_kwargs):
88
- """
89
- Adds instrumentation to the invoke model call.
90
31
 
91
- Args:
92
- original_method: The original invoke model method.
93
- *method_args: Positional arguments for the method.
94
- **method_kwargs: Keyword arguments for the method.
95
- Returns:
96
- The modified response with telemetry.
32
+ """
33
+ Wraps the GenAI function call.
97
34
  """
98
35
 
99
36
  server_address, server_port = set_server_address_and_port(instance, 'aws.amazon.com', 443)
100
37
  request_model = method_kwargs.get('modelId', 'amazon.titan-text-express-v1')
101
38
 
102
- span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
39
+ span_name = f'{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
103
40
 
104
41
  with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
105
42
  start_time = time.time()
106
43
  response = original_method(*method_args, **method_kwargs)
107
- end_time = time.time()
108
-
109
- response_dict = response_as_dict(response)
110
-
111
- try:
112
- message_prompt = method_kwargs.get('messages', '')
113
- formatted_messages = []
114
- for message in message_prompt:
115
- role = message['role']
116
- content = message['content']
117
-
118
- if isinstance(content, list):
119
- content_str = ", ".join(f'text: {item["text"]}' for item in content if "text" in item)
120
- formatted_messages.append(f'{role}: {content_str}')
121
- else:
122
- formatted_messages.append(f'{role}: {content}')
123
- prompt = '\n'.join(formatted_messages)
124
-
125
- input_tokens = response_dict.get('usage').get('inputTokens')
126
- output_tokens = response_dict.get('usage').get('outputTokens')
127
-
128
- # Calculate cost of the operation
129
- cost = get_chat_model_cost(request_model, pricing_info,
130
- input_tokens, output_tokens)
131
-
132
- llm_response = response_dict.get('output').get('message').get('content')[0].get('text')
133
-
134
- # Set base span attribues (OTel Semconv)
135
- span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
136
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
137
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
138
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
139
- SemanticConvetion.GEN_AI_SYSTEM_AWS_BEDROCK)
140
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
141
- request_model)
142
- span.set_attribute(SemanticConvetion.SERVER_PORT,
143
- server_port)
144
-
145
- inference_config = method_kwargs.get('inferenceConfig', {})
146
-
147
- # List of attributes and their config keys
148
- attributes = [
149
- (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequencyPenalty'),
150
- (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'maxTokens'),
151
- (SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presencePenalty'),
152
- (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stopSequences'),
153
- (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
154
- (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'topP'),
155
- (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'topK'),
156
- ]
157
-
158
- # Set each attribute if the corresponding value exists and is not None
159
- for attribute, key in attributes:
160
- value = inference_config.get(key)
161
- if value is not None:
162
- span.set_attribute(attribute, value)
163
-
164
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
165
- response_dict.get('ResponseMetadata').get('RequestId'))
166
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
167
- request_model)
168
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
169
- input_tokens)
170
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
171
- output_tokens)
172
- span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
173
- server_address)
174
- if isinstance(llm_response, str):
175
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
176
- 'text')
177
- else:
178
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
179
- 'json')
180
-
181
- # Set base span attribues (Extras)
182
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
183
- environment)
184
- span.set_attribute(SERVICE_NAME,
185
- application_name)
186
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
187
- False)
188
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
189
- input_tokens + output_tokens)
190
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
191
- cost)
192
- span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
193
- end_time - start_time)
194
- span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
195
- version)
196
-
197
- if capture_message_content:
198
- span.add_event(
199
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
200
- attributes={
201
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
202
- },
203
- )
204
- span.add_event(
205
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
206
- attributes={
207
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llm_response,
208
- },
209
- )
210
-
211
- span.set_status(Status(StatusCode.OK))
212
-
213
- if disable_metrics is False:
214
- attributes = create_metrics_attributes(
215
- service_name=application_name,
216
- deployment_environment=environment,
217
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
218
- system=SemanticConvetion.GEN_AI_SYSTEM_AWS_BEDROCK,
219
- request_model=request_model,
220
- server_address=server_address,
221
- server_port=server_port,
222
- response_model=request_model,
223
- )
224
-
225
- metrics['genai_client_usage_tokens'].record(
226
- input_tokens + output_tokens, attributes
227
- )
228
- metrics['genai_client_operation_duration'].record(
229
- end_time - start_time, attributes
230
- )
231
- metrics['genai_server_ttft'].record(
232
- end_time - start_time, attributes
233
- )
234
- metrics['genai_requests'].add(1, attributes)
235
- metrics['genai_completion_tokens'].add(output_tokens, attributes)
236
- metrics['genai_prompt_tokens'].add(input_tokens, attributes)
237
- metrics['genai_cost'].record(cost, attributes)
238
-
239
- return response
240
-
241
- except Exception as e:
242
- handle_exception(span, e)
243
- logger.error('Error in trace creation: %s', e)
244
-
245
- # Return original response
246
- return response
44
+ llm_config = method_kwargs.get('inferenceConfig', {})
45
+ response = process_chat_response(
46
+ response=response,
47
+ request_model=request_model,
48
+ pricing_info=pricing_info,
49
+ server_port=server_port,
50
+ server_address=server_address,
51
+ environment=environment,
52
+ application_name=application_name,
53
+ metrics=metrics,
54
+ event_provider=event_provider,
55
+ start_time=start_time,
56
+ span=span,
57
+ capture_message_content=capture_message_content,
58
+ disable_metrics=disable_metrics,
59
+ version=version,
60
+ llm_config=llm_config,
61
+ **method_kwargs
62
+ )
63
+
64
+ return response
247
65
 
248
66
  # Get the original client instance from the wrapper
249
67
  client = wrapped(*args, **kwargs)
@@ -0,0 +1,252 @@
1
+ """
2
+ AWS Bedrock OpenTelemetry instrumentation utility functions
3
+ """
4
+ import time
5
+
6
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import Status, StatusCode
8
+
9
+ from openlit.__helpers import (
10
+ calculate_ttft,
11
+ response_as_dict,
12
+ calculate_tbt,
13
+ extract_and_format_input,
14
+ get_chat_model_cost,
15
+ create_metrics_attributes,
16
+ otel_event,
17
+ concatenate_all_contents
18
+ )
19
+ from openlit.semcov import SemanticConvention
20
+
21
+ def process_chunk(self, chunk):
22
+ """
23
+ Process a chunk of response data and update state.
24
+ """
25
+
26
+ end_time = time.time()
27
+ # Record the timestamp for the current chunk
28
+ self._timestamps.append(end_time)
29
+
30
+ if len(self._timestamps) == 1:
31
+ # Calculate time to first chunk
32
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
33
+
34
+ chunked = response_as_dict(chunk)
35
+
36
+ # Collect message IDs and input token from events
37
+ if chunked.get('type') == 'message_start':
38
+ self._response_id = chunked.get('message').get('id')
39
+ self._input_tokens = chunked.get('message').get('usage').get('input_tokens')
40
+ self._response_model = chunked.get('message').get('model')
41
+ self._response_role = chunked.get('message').get('role')
42
+
43
+ # Collect message IDs and aggregated response from events
44
+ if chunked.get('type') == 'content_block_delta':
45
+ if chunked.get('delta').get('text'):
46
+ self._llmresponse += chunked.get('delta').get('text')
47
+ elif chunked.get('delta').get('partial_json'):
48
+ self._tool_arguments += chunked.get('delta').get('partial_json')
49
+
50
+ if chunked.get('type') == 'content_block_start':
51
+ if chunked.get('content_block').get('id'):
52
+ self._tool_id = chunked.get('content_block').get('id')
53
+ if chunked.get('content_block').get('name'):
54
+ self._tool_name = chunked.get('content_block').get('name')
55
+
56
+ # Collect output tokens and stop reason from events
57
+ if chunked.get('type') == 'message_delta':
58
+ self._output_tokens = chunked.get('usage').get('output_tokens')
59
+ self._finish_reason = chunked.get('delta').get('stop_reason')
60
+
61
+ def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
62
+ event_provider, capture_message_content, disable_metrics, version, llm_config, is_stream):
63
+ """
64
+ Process chat request and generate Telemetry
65
+ """
66
+
67
+ scope._end_time = time.time()
68
+ if len(scope._timestamps) > 1:
69
+ scope._tbt = calculate_tbt(scope._timestamps)
70
+
71
+ formatted_messages = extract_and_format_input(scope._kwargs.get('messages', ''))
72
+ print(formatted_messages)
73
+ request_model = scope._kwargs.get('model', 'claude-3-opus-20240229')
74
+
75
+ cost = get_chat_model_cost(request_model, pricing_info, scope._input_tokens, scope._output_tokens)
76
+
77
+ # Set Span attributes (OTel Semconv)
78
+ scope._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
79
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OPERATION, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
80
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM, SemanticConvention.GEN_AI_SYSTEM_AWS_BEDROCK)
81
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL, request_model)
82
+ scope._span.set_attribute(SemanticConvention.SERVER_PORT, scope._server_port)
83
+
84
+ # List of attributes and their config keys
85
+ attributes = [
86
+ (SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequencyPenalty'),
87
+ (SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, 'maxTokens'),
88
+ (SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presencePenalty'),
89
+ (SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, 'stopSequences'),
90
+ (SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
91
+ (SemanticConvention.GEN_AI_REQUEST_TOP_P, 'topP'),
92
+ (SemanticConvention.GEN_AI_REQUEST_TOP_K, 'topK'),
93
+ ]
94
+
95
+ # Set each attribute if the corresponding value exists and is not None
96
+ for attribute, key in attributes:
97
+ value = llm_config.get(key)
98
+ if value is not None:
99
+ scope._span.set_attribute(attribute, value)
100
+
101
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON, [scope._finish_reason])
102
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID, scope._response_id)
103
+ scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._response_model)
104
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens)
105
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens)
106
+ scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
107
+
108
+ scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
109
+ 'text' if isinstance(scope._llmresponse, str) else 'json')
110
+
111
+ scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
112
+ scope._span.set_attribute(SERVICE_NAME, application_name)
113
+ scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
114
+ scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, scope._input_tokens + scope._output_tokens)
115
+ scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
116
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
117
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
118
+ scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
119
+
120
+ # To be removed one the change to log events (from span events) is complete
121
+ prompt = concatenate_all_contents(formatted_messages)
122
+ if capture_message_content:
123
+ scope._span.add_event(
124
+ name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
125
+ attributes={
126
+ SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
127
+ },
128
+ )
129
+ scope._span.add_event(
130
+ name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
131
+ attributes={
132
+ SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._llmresponse,
133
+ },
134
+ )
135
+
136
+ choice_event_body = {
137
+ 'finish_reason': scope._finish_reason,
138
+ 'index': 0,
139
+ 'message': {
140
+ **({'content': scope._llmresponse} if capture_message_content else {}),
141
+ 'role': scope._response_role
142
+ }
143
+ }
144
+
145
+ # Emit events
146
+ for role in ['user', 'system', 'assistant', 'tool']:
147
+ if formatted_messages.get(role, {}).get('content', ''):
148
+ event = otel_event(
149
+ name=getattr(SemanticConvention, f'GEN_AI_{role.upper()}_MESSAGE'),
150
+ attributes={
151
+ SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AWS_BEDROCK
152
+ },
153
+ body = {
154
+ # pylint: disable=line-too-long
155
+ **({'content': formatted_messages.get(role, {}).get('content', '')} if capture_message_content else {}),
156
+ 'role': formatted_messages.get(role, {}).get('role', []),
157
+ **({
158
+ 'tool_calls': {
159
+ 'function': {
160
+ # pylint: disable=line-too-long
161
+ 'name': (scope._tool_calls[0].get('function', {}).get('name', '') if scope._tool_calls else ''),
162
+ 'arguments': (scope._tool_calls[0].get('function', {}).get('arguments', '') if scope._tool_calls else '')
163
+ },
164
+ 'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else ''),
165
+ 'type': 'function'
166
+ }
167
+ } if role == 'assistant' else {}),
168
+ **({
169
+ 'id': (scope._tool_calls[0].get('id', '') if scope._tool_calls else '')
170
+ } if role == 'tool' else {})
171
+ }
172
+ )
173
+ event_provider.emit(event)
174
+
175
+ choice_event = otel_event(
176
+ name=SemanticConvention.GEN_AI_CHOICE,
177
+ attributes={
178
+ SemanticConvention.GEN_AI_SYSTEM: SemanticConvention.GEN_AI_SYSTEM_AWS_BEDROCK
179
+ },
180
+ body=choice_event_body
181
+ )
182
+ event_provider.emit(choice_event)
183
+
184
+ scope._span.set_status(Status(StatusCode.OK))
185
+
186
+ if not disable_metrics:
187
+ metrics_attributes = create_metrics_attributes(
188
+ service_name=application_name,
189
+ deployment_environment=environment,
190
+ operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
191
+ system=SemanticConvention.GEN_AI_SYSTEM_AWS_BEDROCK,
192
+ request_model=request_model,
193
+ server_address=scope._server_address,
194
+ server_port=scope._server_port,
195
+ response_model=scope._response_model,
196
+ )
197
+
198
+ metrics['genai_client_usage_tokens'].record(scope._input_tokens + scope._output_tokens, metrics_attributes)
199
+ metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
200
+ metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
201
+ metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
202
+ metrics['genai_requests'].add(1, metrics_attributes)
203
+ metrics['genai_completion_tokens'].add(scope._output_tokens, metrics_attributes)
204
+ metrics['genai_prompt_tokens'].add(scope._input_tokens, metrics_attributes)
205
+ metrics['genai_cost'].record(cost, metrics_attributes)
206
+
207
+ def process_streaming_chat_response(self, pricing_info, environment, application_name, metrics,
208
+ event_provider, capture_message_content=False, disable_metrics=False, version='', llm_config=''):
209
+
210
+ """
211
+ Process chat request and generate Telemetry
212
+ """
213
+ if self._tool_id != '':
214
+ self._tool_calls = {
215
+ 'id': self._tool_id,
216
+ 'name': self._tool_name,
217
+ 'input': self._tool_arguments
218
+ }
219
+
220
+ common_chat_logic(self, pricing_info, environment, application_name, metrics,
221
+ event_provider, capture_message_content, disable_metrics, version, llm_config, is_stream=True)
222
+
223
+ def process_chat_response(response, request_model, pricing_info, server_port, server_address, environment,
224
+ application_name, metrics, event_provider, start_time, span, capture_message_content=False,
225
+ disable_metrics=False, version='1.0.0', llm_config='', **kwargs):
226
+
227
+ """
228
+ Process chat request and generate Telemetry
229
+ """
230
+
231
+ self = type('GenericScope', (), {})()
232
+ response_dict = response_as_dict(response)
233
+
234
+ # pylint: disable = no-member
235
+ self._start_time = start_time
236
+ self._end_time = time.time()
237
+ self._span = span
238
+ self._llmresponse = response_dict.get('output').get('message').get('content')[0].get('text')
239
+ self._response_role = 'assistant'
240
+ self._input_tokens = response_dict.get('usage').get('inputTokens')
241
+ self._output_tokens = response_dict.get('usage').get('outputTokens')
242
+ self._response_model = request_model
243
+ self._finish_reason = response_dict.get('stopReason', '')
244
+ self._response_id = response_dict.get('ResponseMetadata').get('RequestId')
245
+ self._timestamps = []
246
+ self._ttft, self._tbt = self._end_time - self._start_time, 0
247
+ self._server_address, self._server_port = server_address, server_port
248
+ self._kwargs = kwargs
249
+ common_chat_logic(self, pricing_info, environment, application_name, metrics,
250
+ event_provider, capture_message_content, disable_metrics, version, llm_config, is_stream=False)
251
+
252
+ return response