openlit 1.16.0__py3-none-any.whl → 1.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -120,7 +120,7 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
120
120
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
121
121
  kwargs.get("model", "claude-3-sonnet-20240229"))
122
122
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
123
- kwargs.get("max_tokens", ""))
123
+ kwargs.get("max_tokens", -1))
124
124
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
125
125
  True)
126
126
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -130,7 +130,7 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
130
130
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
131
131
  kwargs.get("top_k", ""))
132
132
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
133
- finish_reason)
133
+ [finish_reason])
134
134
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
135
135
  prompt_tokens)
136
136
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
@@ -140,10 +140,18 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
140
140
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
141
141
  cost)
142
142
  if trace_content:
143
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
144
- prompt)
145
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
146
- llmresponse)
143
+ span.add_event(
144
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
145
+ attributes={
146
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
147
+ },
148
+ )
149
+ span.add_event(
150
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
151
+ attributes={
152
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
153
+ },
154
+ )
147
155
 
148
156
  span.set_status(Status(StatusCode.OK))
149
157
 
@@ -224,7 +232,7 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
224
232
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
225
233
  kwargs.get("model", "claude-3-sonnet-20240229"))
226
234
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
227
- kwargs.get("max_tokens", ""))
235
+ kwargs.get("max_tokens", -1))
228
236
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
229
237
  False)
230
238
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -234,7 +242,7 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
234
242
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
235
243
  kwargs.get("top_k", ""))
236
244
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
237
- response.stop_reason)
245
+ [response.stop_reason])
238
246
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
239
247
  response.usage.input_tokens)
240
248
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
@@ -244,11 +252,21 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
244
252
  response.usage.output_tokens)
245
253
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
246
254
  cost)
255
+
247
256
  if trace_content:
248
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
249
- prompt)
250
- # pylint: disable=line-too-long
251
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.content[0].text if response.content else "")
257
+ span.add_event(
258
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
259
+ attributes={
260
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
261
+ },
262
+ )
263
+ span.add_event(
264
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
265
+ attributes={
266
+ # pylint: disable=line-too-long
267
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.content[0].text if response.content else "",
268
+ },
269
+ )
252
270
 
253
271
  span.set_status(Status(StatusCode.OK))
254
272
 
@@ -120,7 +120,7 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
120
120
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
121
121
  kwargs.get("model", "claude-3-sonnet-20240229"))
122
122
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
123
- kwargs.get("max_tokens", ""))
123
+ kwargs.get("max_tokens", -1))
124
124
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
125
125
  True)
126
126
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -130,7 +130,7 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
130
130
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
131
131
  kwargs.get("top_k", ""))
132
132
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
133
- finish_reason)
133
+ [finish_reason])
134
134
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
135
135
  prompt_tokens)
136
136
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
@@ -140,10 +140,18 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
140
140
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
141
141
  cost)
142
142
  if trace_content:
143
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
144
- prompt)
145
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
146
- llmresponse)
143
+ span.add_event(
144
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
145
+ attributes={
146
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
147
+ },
148
+ )
149
+ span.add_event(
150
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
151
+ attributes={
152
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
153
+ },
154
+ )
147
155
 
148
156
  span.set_status(Status(StatusCode.OK))
149
157
 
@@ -224,7 +232,7 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
224
232
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
225
233
  kwargs.get("model", "claude-3-sonnet-20240229"))
226
234
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
227
- kwargs.get("max_tokens", ""))
235
+ kwargs.get("max_tokens", -1))
228
236
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
229
237
  False)
230
238
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -234,7 +242,7 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
234
242
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
235
243
  kwargs.get("top_k", ""))
236
244
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
237
- response.stop_reason)
245
+ [response.stop_reason])
238
246
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
239
247
  response.usage.input_tokens)
240
248
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
@@ -245,10 +253,19 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
245
253
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
246
254
  cost)
247
255
  if trace_content:
248
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
249
- prompt)
250
- # pylint: disable=line-too-long
251
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.content[0].text if response.content else "")
256
+ span.add_event(
257
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
258
+ attributes={
259
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
260
+ },
261
+ )
262
+ span.add_event(
263
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
264
+ attributes={
265
+ # pylint: disable=line-too-long
266
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.content[0].text if response.content else "",
267
+ },
268
+ )
252
269
 
253
270
  span.set_status(Status(StatusCode.OK))
254
271
 
@@ -6,9 +6,9 @@ import importlib.metadata
6
6
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
7
7
  from wrapt import wrap_function_wrapper
8
8
 
9
- from openlit.instrumentation.bedrock.bedrock import chat
9
+ from openlit.instrumentation.bedrock.bedrock import converse
10
10
 
11
- _instruments = ("boto3 >= 1.34.93",)
11
+ _instruments = ("boto3 >= 1.34.138",)
12
12
 
13
13
  class BedrockInstrumentor(BaseInstrumentor):
14
14
  """
@@ -32,7 +32,7 @@ class BedrockInstrumentor(BaseInstrumentor):
32
32
  wrap_function_wrapper(
33
33
  "botocore.client",
34
34
  "ClientCreator.create_client",
35
- chat("bedrock.invoke_model", version, environment, application_name,
35
+ converse("bedrock.converse", version, environment, application_name,
36
36
  tracer, pricing_info, trace_content, metrics, disable_metrics),
37
37
  )
38
38
 
@@ -4,15 +4,14 @@ Module for monitoring Amazon Bedrock API calls.
4
4
  """
5
5
 
6
6
  import logging
7
- import json
8
7
  from botocore.response import StreamingBody
9
8
  from botocore.exceptions import ReadTimeoutError, ResponseStreamingError
10
9
  from urllib3.exceptions import ProtocolError as URLLib3ProtocolError
11
10
  from urllib3.exceptions import ReadTimeoutError as URLLib3ReadTimeoutError
12
11
  from opentelemetry.trace import SpanKind, Status, StatusCode
13
12
  from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
14
- from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, get_image_model_cost
15
- from openlit.__helpers import handle_exception, general_tokens
13
+ from openlit.__helpers import get_chat_model_cost
14
+ from openlit.__helpers import handle_exception
16
15
  from openlit.semcov import SemanticConvetion
17
16
 
18
17
  # Initialize logger for logging potential issues and operations
@@ -49,7 +48,7 @@ class CustomStreamWrapper(StreamingBody):
49
48
  return data_chunk
50
49
 
51
50
 
52
- def chat(gen_ai_endpoint, version, environment, application_name, tracer,
51
+ def converse(gen_ai_endpoint, version, environment, application_name, tracer,
53
52
  pricing_info, trace_content, metrics, disable_metrics):
54
53
  """
55
54
  Generates a telemetry wrapper for messages to collect metrics.
@@ -80,166 +79,23 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
80
79
  Returns:
81
80
  Response from the original method.
82
81
  """
83
- def handle_image(span, model, request_body, response_body):
84
- cost = 0
85
- if "amazon" in model:
86
- # pylint: disable=line-too-long
87
- size = str(request_body.get("imageGenerationConfig", {}).get("width", 1024)) + "x" + str(request_body.get("imageGenerationConfig", {}).get("height", 1024))
88
- quality = request_body.get("imageGenerationConfig", {}).get("quality", "standard")
89
- n = request_body.get("imageGenerationConfig", {}).get("numberOfImages", 1)
90
82
 
91
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
92
- size)
93
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
94
- quality)
95
- # Calculate cost of the operation
96
- cost = n * get_image_model_cost(model,
97
- pricing_info, size, quality)
98
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
99
- cost)
100
- if trace_content:
101
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
102
- request_body.get("textToImageParams")["text"])
103
-
104
- span.set_status(Status(StatusCode.OK))
105
-
106
- if disable_metrics is False:
107
- attributes = {
108
- TELEMETRY_SDK_NAME:
109
- "openlit",
110
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
111
- application_name,
112
- SemanticConvetion.GEN_AI_SYSTEM:
113
- SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
114
- SemanticConvetion.GEN_AI_ENVIRONMENT:
115
- environment,
116
- SemanticConvetion.GEN_AI_TYPE:
117
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
118
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
119
- model
120
- }
121
-
122
- metrics["genai_requests"].add(1, attributes)
123
- metrics["genai_cost"].record(cost, attributes)
124
-
125
- def handle_embed(span, model, request_body, response_body):
126
- prompt_tokens, cost = 0, 0
127
- if "amazon" in model:
128
- prompt_tokens = response_body["inputTextTokenCount"]
129
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
130
- prompt_tokens)
131
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
132
- prompt_tokens)
133
- # Calculate cost of the operation
134
- cost = get_embed_model_cost(model,
135
- pricing_info, prompt_tokens)
136
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
137
- cost)
138
- if trace_content:
139
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
140
- request_body["inputText"])
141
-
142
- span.set_status(Status(StatusCode.OK))
143
-
144
- if disable_metrics is False:
145
- attributes = {
146
- TELEMETRY_SDK_NAME:
147
- "openlit",
148
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
149
- application_name,
150
- SemanticConvetion.GEN_AI_SYSTEM:
151
- SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
152
- SemanticConvetion.GEN_AI_ENVIRONMENT:
153
- environment,
154
- SemanticConvetion.GEN_AI_TYPE:
155
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
156
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
157
- model
158
- }
159
-
160
- metrics["genai_requests"].add(1, attributes)
161
- metrics["genai_total_tokens"].add(
162
- prompt_tokens, attributes
163
- )
164
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
165
- metrics["genai_cost"].record(cost, attributes)
166
-
167
- def handle_chat(span, model, request_body, response_body):
168
- prompt_tokens, completion_tokens, cost = 0, 0, 0
169
-
170
- if "amazon" in model:
171
- prompt_tokens = response_body["inputTextTokenCount"]
172
- completion_tokens = response_body["results"][0]["tokenCount"]
173
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
174
- prompt_tokens)
175
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
176
- completion_tokens)
177
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
178
- completion_tokens +
179
- prompt_tokens)
180
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
181
- response_body["results"][0]["completionReason"])
182
-
183
- # Calculate cost of the operation
184
- cost = get_chat_model_cost(model,
185
- pricing_info, prompt_tokens,
186
- completion_tokens)
187
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
188
- cost)
189
-
190
- if trace_content:
191
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
192
- request_body["inputText"])
193
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
194
- response_body["results"][0]["outputText"])
195
-
196
- elif "mistral" in model:
197
- prompt_tokens = general_tokens(request_body["prompt"])
198
- completion_tokens = general_tokens(response_body["outputs"][0]["text"])
199
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
200
- prompt_tokens)
201
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
202
- completion_tokens)
203
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
204
- prompt_tokens + completion_tokens)
205
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
206
- response_body["outputs"][0]["stop_reason"])
207
- # Calculate cost of the operation
208
- cost = get_chat_model_cost(model,
209
- pricing_info, prompt_tokens,
210
- completion_tokens)
211
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
212
- cost)
213
-
214
- if trace_content:
215
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
216
- request_body["prompt"])
217
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
218
- response_body["outputs"][0]["text"])
219
-
220
- elif "anthropic" in model:
221
- prompt_tokens = response_body["usage"]["input_tokens"]
222
- completion_tokens = response_body["usage"]["output_tokens"]
223
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
224
- prompt_tokens)
225
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
226
- completion_tokens)
227
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
228
- completion_tokens +
229
- prompt_tokens)
230
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
231
- response_body["stop_reason"])
83
+ def converse_wrapper(original_method, *method_args, **method_kwargs):
84
+ """
85
+ Adds instrumentation to the invoke model call.
232
86
 
233
- # Calculate cost of the operation
234
- cost = get_chat_model_cost(model,
235
- pricing_info, prompt_tokens,
236
- completion_tokens)
237
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
238
- cost)
87
+ Args:
88
+ original_method: The original invoke model method.
89
+ *method_args: Positional arguments for the method.
90
+ **method_kwargs: Keyword arguments for the method.
91
+ Returns:
92
+ The modified response with telemetry.
93
+ """
94
+ with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
95
+ response = original_method(*method_args, **method_kwargs)
239
96
 
240
- if trace_content:
241
- # Format 'messages' into a single string
242
- message_prompt = request_body["messages"]
97
+ try:
98
+ message_prompt = method_kwargs.get("messages", "")
243
99
  formatted_messages = []
244
100
  for message in message_prompt:
245
101
  role = message["role"]
@@ -256,145 +112,10 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
256
112
  else:
257
113
  formatted_messages.append(f"{role}: {content}")
258
114
  prompt = "\n".join(formatted_messages)
259
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
260
- prompt)
261
-
262
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
263
- response_body["content"][0]["text"])
264
- elif "meta" in model:
265
- prompt_tokens = response_body["prompt_token_count"]
266
- completion_tokens = response_body["generation_token_count"]
267
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
268
- prompt_tokens)
269
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
270
- completion_tokens)
271
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
272
- completion_tokens +
273
- prompt_tokens)
274
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
275
- response_body["stop_reason"])
276
-
277
- # Calculate cost of the operation
278
- cost = get_chat_model_cost(model,
279
- pricing_info, prompt_tokens,
280
- completion_tokens)
281
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
282
- cost)
283
-
284
- if trace_content:
285
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
286
- request_body["prompt"])
287
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
288
- response_body["generation"])
289
-
290
- elif "cohere" in model and "command-r" not in model:
291
- prompt_tokens = general_tokens(request_body["prompt"])
292
- completion_tokens = general_tokens(response_body["generations"][0]["text"])
293
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
294
- prompt_tokens)
295
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
296
- completion_tokens)
297
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
298
- prompt_tokens + completion_tokens)
299
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
300
- response_body["generations"][0]["finish_reason"])
301
- # Calculate cost of the operation
302
- cost = get_chat_model_cost(model,
303
- pricing_info, prompt_tokens,
304
- completion_tokens)
305
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
306
- cost)
307
-
308
- if trace_content:
309
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
310
- request_body["prompt"])
311
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
312
- response_body["generations"][0]["text"])
313
- elif "ai21" in model:
314
- prompt_tokens = general_tokens(request_body["prompt"])
315
- completion_tokens = general_tokens(response_body["completions"][0]["data"]["text"])
316
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
317
- prompt_tokens)
318
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
319
- completion_tokens)
320
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
321
- prompt_tokens + completion_tokens)
322
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
323
- response_body["completions"][0]["finishReason"]["reason"])
324
- # Calculate cost of the operation
325
- cost = get_chat_model_cost(model,
326
- pricing_info, prompt_tokens,
327
- completion_tokens)
328
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
329
- cost)
330
-
331
- if trace_content:
332
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
333
- request_body["prompt"])
334
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
335
- response_body["completions"][0]["data"]["text"])
336
-
337
- span.set_status(Status(StatusCode.OK))
338
-
339
- if disable_metrics is False:
340
- attributes = {
341
- TELEMETRY_SDK_NAME:
342
- "openlit",
343
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
344
- application_name,
345
- SemanticConvetion.GEN_AI_SYSTEM:
346
- SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
347
- SemanticConvetion.GEN_AI_ENVIRONMENT:
348
- environment,
349
- SemanticConvetion.GEN_AI_TYPE:
350
- SemanticConvetion.GEN_AI_TYPE_CHAT,
351
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
352
- model
353
- }
354
-
355
- metrics["genai_requests"].add(1, attributes)
356
- metrics["genai_total_tokens"].add(
357
- prompt_tokens + completion_tokens, attributes
358
- )
359
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
360
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
361
- metrics["genai_cost"].record(cost, attributes)
362
-
363
- def add_instrumentation(original_method, *method_args, **method_kwargs):
364
- """
365
- Adds instrumentation to the invoke model call.
366
-
367
- Args:
368
- original_method: The original invoke model method.
369
- *method_args: Positional arguments for the method.
370
- **method_kwargs: Keyword arguments for the method.
371
- Returns:
372
- The modified response with telemetry.
373
- """
374
- with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
375
- response = original_method(*method_args, **method_kwargs)
376
-
377
- try:
378
- # Modify the response body to be reusable
379
- response["body"] = CustomStreamWrapper(
380
- response["body"]._raw_stream, response["body"]._content_length
381
- )
382
- request_body = json.loads(method_kwargs.get("body"))
383
- response_body = json.loads(response.get("body").read())
384
115
 
385
116
  model = method_kwargs.get("modelId", "amazon.titan-text-express-v1")
386
- if ("stability" in model or "image" in model) and "embed-image" not in model:
387
- generation = "image"
388
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
389
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
390
- elif "embed" in model and "embed-image" not in model:
391
- generation = "embeddings"
392
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
393
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
394
- else:
395
- generation = "chat"
396
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
397
- SemanticConvetion.GEN_AI_TYPE_CHAT)
117
+ input_tokens = response["usage"]["inputTokens"]
118
+ output_tokens = response["usage"]["outputTokens"]
398
119
 
399
120
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
400
121
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
@@ -407,12 +128,60 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
407
128
  application_name)
408
129
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
409
130
  model)
410
- if generation == "chat":
411
- handle_chat(span, model, request_body, response_body)
412
- elif generation == "embeddings":
413
- handle_embed(span, model, request_body, response_body)
414
- elif generation == "image":
415
- handle_image(span, model, request_body, response_body)
131
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
132
+ input_tokens)
133
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
134
+ output_tokens)
135
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
136
+ input_tokens + output_tokens)
137
+
138
+ # Calculate cost of the operation
139
+ cost = get_chat_model_cost(model,
140
+ pricing_info, input_tokens,
141
+ output_tokens)
142
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
143
+ cost)
144
+
145
+ if trace_content:
146
+ span.add_event(
147
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
148
+ attributes={
149
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
150
+ },
151
+ )
152
+ span.add_event(
153
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
154
+ attributes={
155
+ # pylint: disable=line-too-long
156
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response["output"]["message"]["content"][0]["text"],
157
+ },
158
+ )
159
+
160
+ span.set_status(Status(StatusCode.OK))
161
+
162
+ if disable_metrics is False:
163
+ attributes = {
164
+ TELEMETRY_SDK_NAME:
165
+ "openlit",
166
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
167
+ application_name,
168
+ SemanticConvetion.GEN_AI_SYSTEM:
169
+ SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
170
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
171
+ environment,
172
+ SemanticConvetion.GEN_AI_TYPE:
173
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
174
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
175
+ model
176
+ }
177
+
178
+ metrics["genai_requests"].add(1, attributes)
179
+ metrics["genai_total_tokens"].add(
180
+ input_tokens + output_tokens, attributes
181
+ )
182
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
183
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
184
+ metrics["genai_cost"].record(cost, attributes)
416
185
 
417
186
  return response
418
187
 
@@ -427,9 +196,10 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
427
196
  client = wrapped(*args, **kwargs)
428
197
 
429
198
  # Replace the original method with the instrumented one
430
- original_invoke_model = client.invoke_model
431
- client.invoke_model = lambda *args, **kwargs: add_instrumentation(original_invoke_model,
432
- *args, **kwargs)
199
+ if kwargs.get("service_name") == "bedrock-runtime":
200
+ original_invoke_model = client.converse
201
+ client.converse = lambda *args, **kwargs: converse_wrapper(original_invoke_model,
202
+ *args, **kwargs)
433
203
 
434
204
  return client
435
205