openlit 1.16.0__py3-none-any.whl → 1.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -120,7 +120,7 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
120
120
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
121
121
  kwargs.get("model", "claude-3-sonnet-20240229"))
122
122
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
123
- kwargs.get("max_tokens", ""))
123
+ kwargs.get("max_tokens", -1))
124
124
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
125
125
  True)
126
126
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -224,7 +224,7 @@ def messages(gen_ai_endpoint, version, environment, application_name, tracer,
224
224
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
225
225
  kwargs.get("model", "claude-3-sonnet-20240229"))
226
226
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
227
- kwargs.get("max_tokens", ""))
227
+ kwargs.get("max_tokens", -1))
228
228
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
229
229
  False)
230
230
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -120,7 +120,7 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
120
120
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
121
121
  kwargs.get("model", "claude-3-sonnet-20240229"))
122
122
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
123
- kwargs.get("max_tokens", ""))
123
+ kwargs.get("max_tokens", -1))
124
124
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
125
125
  True)
126
126
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -224,7 +224,7 @@ def async_messages(gen_ai_endpoint, version, environment, application_name,
224
224
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
225
225
  kwargs.get("model", "claude-3-sonnet-20240229"))
226
226
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
227
- kwargs.get("max_tokens", ""))
227
+ kwargs.get("max_tokens", -1))
228
228
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
229
229
  False)
230
230
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
@@ -6,9 +6,9 @@ import importlib.metadata
6
6
  from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
7
7
  from wrapt import wrap_function_wrapper
8
8
 
9
- from openlit.instrumentation.bedrock.bedrock import chat
9
+ from openlit.instrumentation.bedrock.bedrock import converse
10
10
 
11
- _instruments = ("boto3 >= 1.34.93",)
11
+ _instruments = ("boto3 >= 1.34.138",)
12
12
 
13
13
  class BedrockInstrumentor(BaseInstrumentor):
14
14
  """
@@ -32,7 +32,7 @@ class BedrockInstrumentor(BaseInstrumentor):
32
32
  wrap_function_wrapper(
33
33
  "botocore.client",
34
34
  "ClientCreator.create_client",
35
- chat("bedrock.invoke_model", version, environment, application_name,
35
+ converse("bedrock.converse", version, environment, application_name,
36
36
  tracer, pricing_info, trace_content, metrics, disable_metrics),
37
37
  )
38
38
 
@@ -4,15 +4,14 @@ Module for monitoring Amazon Bedrock API calls.
4
4
  """
5
5
 
6
6
  import logging
7
- import json
8
7
  from botocore.response import StreamingBody
9
8
  from botocore.exceptions import ReadTimeoutError, ResponseStreamingError
10
9
  from urllib3.exceptions import ProtocolError as URLLib3ProtocolError
11
10
  from urllib3.exceptions import ReadTimeoutError as URLLib3ReadTimeoutError
12
11
  from opentelemetry.trace import SpanKind, Status, StatusCode
13
12
  from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
14
- from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, get_image_model_cost
15
- from openlit.__helpers import handle_exception, general_tokens
13
+ from openlit.__helpers import get_chat_model_cost
14
+ from openlit.__helpers import handle_exception
16
15
  from openlit.semcov import SemanticConvetion
17
16
 
18
17
  # Initialize logger for logging potential issues and operations
@@ -49,7 +48,7 @@ class CustomStreamWrapper(StreamingBody):
49
48
  return data_chunk
50
49
 
51
50
 
52
- def chat(gen_ai_endpoint, version, environment, application_name, tracer,
51
+ def converse(gen_ai_endpoint, version, environment, application_name, tracer,
53
52
  pricing_info, trace_content, metrics, disable_metrics):
54
53
  """
55
54
  Generates a telemetry wrapper for messages to collect metrics.
@@ -80,166 +79,24 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
80
79
  Returns:
81
80
  Response from the original method.
82
81
  """
83
- def handle_image(span, model, request_body, response_body):
84
- cost = 0
85
- if "amazon" in model:
86
- # pylint: disable=line-too-long
87
- size = str(request_body.get("imageGenerationConfig", {}).get("width", 1024)) + "x" + str(request_body.get("imageGenerationConfig", {}).get("height", 1024))
88
- quality = request_body.get("imageGenerationConfig", {}).get("quality", "standard")
89
- n = request_body.get("imageGenerationConfig", {}).get("numberOfImages", 1)
90
82
 
91
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
92
- size)
93
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
94
- quality)
95
- # Calculate cost of the operation
96
- cost = n * get_image_model_cost(model,
97
- pricing_info, size, quality)
98
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
99
- cost)
100
- if trace_content:
101
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
102
- request_body.get("textToImageParams")["text"])
103
-
104
- span.set_status(Status(StatusCode.OK))
105
-
106
- if disable_metrics is False:
107
- attributes = {
108
- TELEMETRY_SDK_NAME:
109
- "openlit",
110
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
111
- application_name,
112
- SemanticConvetion.GEN_AI_SYSTEM:
113
- SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
114
- SemanticConvetion.GEN_AI_ENVIRONMENT:
115
- environment,
116
- SemanticConvetion.GEN_AI_TYPE:
117
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
118
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
119
- model
120
- }
121
-
122
- metrics["genai_requests"].add(1, attributes)
123
- metrics["genai_cost"].record(cost, attributes)
124
-
125
- def handle_embed(span, model, request_body, response_body):
126
- prompt_tokens, cost = 0, 0
127
- if "amazon" in model:
128
- prompt_tokens = response_body["inputTextTokenCount"]
129
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
130
- prompt_tokens)
131
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
132
- prompt_tokens)
133
- # Calculate cost of the operation
134
- cost = get_embed_model_cost(model,
135
- pricing_info, prompt_tokens)
136
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
137
- cost)
138
- if trace_content:
139
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
140
- request_body["inputText"])
141
-
142
- span.set_status(Status(StatusCode.OK))
143
-
144
- if disable_metrics is False:
145
- attributes = {
146
- TELEMETRY_SDK_NAME:
147
- "openlit",
148
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
149
- application_name,
150
- SemanticConvetion.GEN_AI_SYSTEM:
151
- SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
152
- SemanticConvetion.GEN_AI_ENVIRONMENT:
153
- environment,
154
- SemanticConvetion.GEN_AI_TYPE:
155
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
156
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
157
- model
158
- }
159
-
160
- metrics["genai_requests"].add(1, attributes)
161
- metrics["genai_total_tokens"].add(
162
- prompt_tokens, attributes
163
- )
164
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
165
- metrics["genai_cost"].record(cost, attributes)
166
-
167
- def handle_chat(span, model, request_body, response_body):
168
- prompt_tokens, completion_tokens, cost = 0, 0, 0
169
-
170
- if "amazon" in model:
171
- prompt_tokens = response_body["inputTextTokenCount"]
172
- completion_tokens = response_body["results"][0]["tokenCount"]
173
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
174
- prompt_tokens)
175
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
176
- completion_tokens)
177
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
178
- completion_tokens +
179
- prompt_tokens)
180
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
181
- response_body["results"][0]["completionReason"])
182
-
183
- # Calculate cost of the operation
184
- cost = get_chat_model_cost(model,
185
- pricing_info, prompt_tokens,
186
- completion_tokens)
187
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
188
- cost)
189
-
190
- if trace_content:
191
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
192
- request_body["inputText"])
193
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
194
- response_body["results"][0]["outputText"])
195
-
196
- elif "mistral" in model:
197
- prompt_tokens = general_tokens(request_body["prompt"])
198
- completion_tokens = general_tokens(response_body["outputs"][0]["text"])
199
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
200
- prompt_tokens)
201
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
202
- completion_tokens)
203
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
204
- prompt_tokens + completion_tokens)
205
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
206
- response_body["outputs"][0]["stop_reason"])
207
- # Calculate cost of the operation
208
- cost = get_chat_model_cost(model,
209
- pricing_info, prompt_tokens,
210
- completion_tokens)
211
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
212
- cost)
213
-
214
- if trace_content:
215
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
216
- request_body["prompt"])
217
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
218
- response_body["outputs"][0]["text"])
219
-
220
- elif "anthropic" in model:
221
- prompt_tokens = response_body["usage"]["input_tokens"]
222
- completion_tokens = response_body["usage"]["output_tokens"]
223
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
224
- prompt_tokens)
225
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
226
- completion_tokens)
227
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
228
- completion_tokens +
229
- prompt_tokens)
230
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
231
- response_body["stop_reason"])
83
+ def converse_wrapper(original_method, *method_args, **method_kwargs):
84
+ """
85
+ Adds instrumentation to the invoke model call.
232
86
 
233
- # Calculate cost of the operation
234
- cost = get_chat_model_cost(model,
235
- pricing_info, prompt_tokens,
236
- completion_tokens)
237
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
238
- cost)
87
+ Args:
88
+ original_method: The original invoke model method.
89
+ *method_args: Positional arguments for the method.
90
+ **method_kwargs: Keyword arguments for the method.
91
+ Returns:
92
+ The modified response with telemetry.
93
+ """
94
+ with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
95
+ response = original_method(*method_args, **method_kwargs)
239
96
 
240
- if trace_content:
241
- # Format 'messages' into a single string
242
- message_prompt = request_body["messages"]
97
+ try:
98
+ message_prompt = method_kwargs.get("messages", "")
99
+ print(message_prompt)
243
100
  formatted_messages = []
244
101
  for message in message_prompt:
245
102
  role = message["role"]
@@ -256,145 +113,10 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
256
113
  else:
257
114
  formatted_messages.append(f"{role}: {content}")
258
115
  prompt = "\n".join(formatted_messages)
259
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
260
- prompt)
261
-
262
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
263
- response_body["content"][0]["text"])
264
- elif "meta" in model:
265
- prompt_tokens = response_body["prompt_token_count"]
266
- completion_tokens = response_body["generation_token_count"]
267
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
268
- prompt_tokens)
269
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
270
- completion_tokens)
271
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
272
- completion_tokens +
273
- prompt_tokens)
274
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
275
- response_body["stop_reason"])
276
-
277
- # Calculate cost of the operation
278
- cost = get_chat_model_cost(model,
279
- pricing_info, prompt_tokens,
280
- completion_tokens)
281
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
282
- cost)
283
-
284
- if trace_content:
285
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
286
- request_body["prompt"])
287
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
288
- response_body["generation"])
289
-
290
- elif "cohere" in model and "command-r" not in model:
291
- prompt_tokens = general_tokens(request_body["prompt"])
292
- completion_tokens = general_tokens(response_body["generations"][0]["text"])
293
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
294
- prompt_tokens)
295
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
296
- completion_tokens)
297
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
298
- prompt_tokens + completion_tokens)
299
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
300
- response_body["generations"][0]["finish_reason"])
301
- # Calculate cost of the operation
302
- cost = get_chat_model_cost(model,
303
- pricing_info, prompt_tokens,
304
- completion_tokens)
305
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
306
- cost)
307
-
308
- if trace_content:
309
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
310
- request_body["prompt"])
311
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
312
- response_body["generations"][0]["text"])
313
- elif "ai21" in model:
314
- prompt_tokens = general_tokens(request_body["prompt"])
315
- completion_tokens = general_tokens(response_body["completions"][0]["data"]["text"])
316
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
317
- prompt_tokens)
318
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
319
- completion_tokens)
320
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
321
- prompt_tokens + completion_tokens)
322
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
323
- response_body["completions"][0]["finishReason"]["reason"])
324
- # Calculate cost of the operation
325
- cost = get_chat_model_cost(model,
326
- pricing_info, prompt_tokens,
327
- completion_tokens)
328
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
329
- cost)
330
-
331
- if trace_content:
332
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
333
- request_body["prompt"])
334
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
335
- response_body["completions"][0]["data"]["text"])
336
-
337
- span.set_status(Status(StatusCode.OK))
338
-
339
- if disable_metrics is False:
340
- attributes = {
341
- TELEMETRY_SDK_NAME:
342
- "openlit",
343
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
344
- application_name,
345
- SemanticConvetion.GEN_AI_SYSTEM:
346
- SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
347
- SemanticConvetion.GEN_AI_ENVIRONMENT:
348
- environment,
349
- SemanticConvetion.GEN_AI_TYPE:
350
- SemanticConvetion.GEN_AI_TYPE_CHAT,
351
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
352
- model
353
- }
354
-
355
- metrics["genai_requests"].add(1, attributes)
356
- metrics["genai_total_tokens"].add(
357
- prompt_tokens + completion_tokens, attributes
358
- )
359
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
360
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
361
- metrics["genai_cost"].record(cost, attributes)
362
-
363
- def add_instrumentation(original_method, *method_args, **method_kwargs):
364
- """
365
- Adds instrumentation to the invoke model call.
366
-
367
- Args:
368
- original_method: The original invoke model method.
369
- *method_args: Positional arguments for the method.
370
- **method_kwargs: Keyword arguments for the method.
371
- Returns:
372
- The modified response with telemetry.
373
- """
374
- with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
375
- response = original_method(*method_args, **method_kwargs)
376
-
377
- try:
378
- # Modify the response body to be reusable
379
- response["body"] = CustomStreamWrapper(
380
- response["body"]._raw_stream, response["body"]._content_length
381
- )
382
- request_body = json.loads(method_kwargs.get("body"))
383
- response_body = json.loads(response.get("body").read())
384
116
 
385
117
  model = method_kwargs.get("modelId", "amazon.titan-text-express-v1")
386
- if ("stability" in model or "image" in model) and "embed-image" not in model:
387
- generation = "image"
388
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
389
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
390
- elif "embed" in model and "embed-image" not in model:
391
- generation = "embeddings"
392
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
393
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
394
- else:
395
- generation = "chat"
396
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
397
- SemanticConvetion.GEN_AI_TYPE_CHAT)
118
+ input_tokens = response["usage"]["inputTokens"]
119
+ output_tokens = response["usage"]["outputTokens"]
398
120
 
399
121
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
400
122
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
@@ -407,12 +129,51 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
407
129
  application_name)
408
130
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
409
131
  model)
410
- if generation == "chat":
411
- handle_chat(span, model, request_body, response_body)
412
- elif generation == "embeddings":
413
- handle_embed(span, model, request_body, response_body)
414
- elif generation == "image":
415
- handle_image(span, model, request_body, response_body)
132
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
133
+ input_tokens)
134
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
135
+ output_tokens)
136
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
137
+ input_tokens + output_tokens)
138
+
139
+ # Calculate cost of the operation
140
+ cost = get_chat_model_cost(model,
141
+ pricing_info, input_tokens,
142
+ output_tokens)
143
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
144
+ cost)
145
+
146
+ if trace_content:
147
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
148
+ prompt)
149
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
150
+ response["output"]["message"]["content"][0]["text"])
151
+
152
+ span.set_status(Status(StatusCode.OK))
153
+
154
+ if disable_metrics is False:
155
+ attributes = {
156
+ TELEMETRY_SDK_NAME:
157
+ "openlit",
158
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
159
+ application_name,
160
+ SemanticConvetion.GEN_AI_SYSTEM:
161
+ SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
162
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
163
+ environment,
164
+ SemanticConvetion.GEN_AI_TYPE:
165
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
166
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
167
+ model
168
+ }
169
+
170
+ metrics["genai_requests"].add(1, attributes)
171
+ metrics["genai_total_tokens"].add(
172
+ input_tokens + output_tokens, attributes
173
+ )
174
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
175
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
176
+ metrics["genai_cost"].record(cost, attributes)
416
177
 
417
178
  return response
418
179
 
@@ -427,9 +188,10 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
427
188
  client = wrapped(*args, **kwargs)
428
189
 
429
190
  # Replace the original method with the instrumented one
430
- original_invoke_model = client.invoke_model
431
- client.invoke_model = lambda *args, **kwargs: add_instrumentation(original_invoke_model,
432
- *args, **kwargs)
191
+ if kwargs.get("service_name") == "bedrock-runtime":
192
+ original_invoke_model = client.converse
193
+ client.converse = lambda *args, **kwargs: converse_wrapper(original_invoke_model,
194
+ *args, **kwargs)
433
195
 
434
196
  return client
435
197
 
@@ -193,7 +193,7 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
193
193
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
194
194
  kwargs.get("temperature", 0.3))
195
195
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
196
- kwargs.get("max_tokens", ""))
196
+ kwargs.get("max_tokens", -1))
197
197
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
198
198
  kwargs.get("seed", ""))
199
199
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
@@ -336,7 +336,7 @@ def chat_stream(gen_ai_endpoint, version, environment, application_name,
336
336
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
337
337
  kwargs.get("temperature", 0.3))
338
338
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
339
- kwargs.get("max_tokens", ""))
339
+ kwargs.get("max_tokens", -1))
340
340
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
341
341
  kwargs.get("seed", ""))
342
342
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
@@ -119,15 +119,15 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
119
119
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
120
120
  kwargs.get("user", ""))
121
121
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
122
- kwargs.get("top_p", 1))
122
+ kwargs.get("top_p", 1.0))
123
123
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
124
- kwargs.get("max_tokens", ""))
124
+ kwargs.get("max_tokens", -1))
125
125
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
126
- kwargs.get("temperature", 1))
126
+ kwargs.get("temperature", 1.0))
127
127
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
128
- kwargs.get("presence_penalty", 0))
128
+ kwargs.get("presence_penalty", 0.0))
129
129
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
130
- kwargs.get("frequency_penalty", 0))
130
+ kwargs.get("frequency_penalty", 0.0))
131
131
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
132
132
  kwargs.get("seed", ""))
133
133
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -221,17 +221,17 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
221
221
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
222
222
  kwargs.get("model", "llama3-8b-8192"))
223
223
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
224
- kwargs.get("top_p", 1))
224
+ kwargs.get("top_p", 1.0))
225
225
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
226
- kwargs.get("max_tokens", ""))
226
+ kwargs.get("max_tokens", -1))
227
227
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
228
228
  kwargs.get("name", ""))
229
229
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
230
- kwargs.get("temperature", 1))
230
+ kwargs.get("temperature", 1.0))
231
231
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
232
- kwargs.get("presence_penalty", 0))
232
+ kwargs.get("presence_penalty", 0.0))
233
233
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
234
- kwargs.get("frequency_penalty", 0))
234
+ kwargs.get("frequency_penalty", 0.0))
235
235
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
236
236
  kwargs.get("seed", ""))
237
237
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -119,15 +119,15 @@ def chat(gen_ai_endpoint, version, environment, application_name,
119
119
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
120
120
  kwargs.get("user", ""))
121
121
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
122
- kwargs.get("top_p", 1))
122
+ kwargs.get("top_p", 1.0))
123
123
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
124
- kwargs.get("max_tokens", ""))
124
+ kwargs.get("max_tokens", -1))
125
125
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
126
- kwargs.get("temperature", 1))
126
+ kwargs.get("temperature", 1.0))
127
127
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
128
- kwargs.get("presence_penalty", 0))
128
+ kwargs.get("presence_penalty", 0.0))
129
129
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
130
- kwargs.get("frequency_penalty", 0))
130
+ kwargs.get("frequency_penalty", 0.0))
131
131
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
132
132
  kwargs.get("seed", ""))
133
133
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -221,17 +221,17 @@ def chat(gen_ai_endpoint, version, environment, application_name,
221
221
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
222
222
  kwargs.get("model", "llama3-8b-8192"))
223
223
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
224
- kwargs.get("top_p", 1))
224
+ kwargs.get("top_p", 1.0))
225
225
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
226
- kwargs.get("max_tokens", ""))
226
+ kwargs.get("max_tokens", -1))
227
227
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
228
228
  kwargs.get("name", ""))
229
229
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
230
- kwargs.get("temperature", 1))
230
+ kwargs.get("temperature", 1.0))
231
231
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
232
- kwargs.get("presence_penalty", 0))
232
+ kwargs.get("presence_penalty", 0.0))
233
233
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
234
- kwargs.get("frequency_penalty", 0))
234
+ kwargs.get("frequency_penalty", 0.0))
235
235
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
236
236
  kwargs.get("seed", ""))
237
237
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -95,9 +95,9 @@ def async_chat(gen_ai_endpoint, version, environment, application_name,
95
95
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
96
96
  kwargs.get("temperature", 0.7))
97
97
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
98
- kwargs.get("top_p", 1))
98
+ kwargs.get("top_p", 1.0))
99
99
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
100
- kwargs.get("max_tokens", ""))
100
+ kwargs.get("max_tokens", -1))
101
101
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
102
102
  kwargs.get("random_seed", ""))
103
103
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -252,9 +252,9 @@ def async_chat_stream(gen_ai_endpoint, version, environment, application_name,
252
252
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
253
253
  kwargs.get("temperature", 0.7))
254
254
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
255
- kwargs.get("top_p", 1))
255
+ kwargs.get("top_p", 1.0))
256
256
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
257
- kwargs.get("max_tokens", ""))
257
+ kwargs.get("max_tokens", -1))
258
258
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
259
259
  kwargs.get("random_seed", ""))
260
260
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -94,9 +94,9 @@ def chat(gen_ai_endpoint, version, environment, application_name,
94
94
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
95
95
  kwargs.get("temperature", 0.7))
96
96
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
97
- kwargs.get("top_p", 1))
97
+ kwargs.get("top_p", 1.0))
98
98
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
99
- kwargs.get("max_tokens", ""))
99
+ kwargs.get("max_tokens", -1))
100
100
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
101
101
  kwargs.get("random_seed", ""))
102
102
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -251,9 +251,9 @@ def chat_stream(gen_ai_endpoint, version, environment, application_name,
251
251
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
252
252
  kwargs.get("temperature", 0.7))
253
253
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
254
- kwargs.get("top_p", 1))
254
+ kwargs.get("top_p", 1.0))
255
255
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
256
- kwargs.get("max_tokens", ""))
256
+ kwargs.get("max_tokens", -1))
257
257
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
258
258
  kwargs.get("random_seed", ""))
259
259
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -125,11 +125,11 @@ def azure_async_chat_completions(gen_ai_endpoint, version, environment, applicat
125
125
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
126
126
  kwargs.get("tool_choice", ""))
127
127
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
128
- kwargs.get("temperature", 1))
128
+ kwargs.get("temperature", 1.0))
129
129
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
130
- kwargs.get("presence_penalty", 0))
130
+ kwargs.get("presence_penalty", 0.0))
131
131
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
132
- kwargs.get("frequency_penalty", 0))
132
+ kwargs.get("frequency_penalty", 0.0))
133
133
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
134
134
  kwargs.get("seed", ""))
135
135
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -228,11 +228,11 @@ def azure_async_chat_completions(gen_ai_endpoint, version, environment, applicat
228
228
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
229
229
  kwargs.get("tool_choice", ""))
230
230
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
231
- kwargs.get("temperature", 1))
231
+ kwargs.get("temperature", 1.0))
232
232
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
233
- kwargs.get("presence_penalty", 0))
233
+ kwargs.get("presence_penalty", 0.0))
234
234
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
235
- kwargs.get("frequency_penalty", 0))
235
+ kwargs.get("frequency_penalty", 0.0))
236
236
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
237
237
  kwargs.get("seed", ""))
238
238
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -425,11 +425,11 @@ def azure_async_completions(gen_ai_endpoint, version, environment, application_n
425
425
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
426
426
  kwargs.get("tool_choice", ""))
427
427
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
428
- kwargs.get("temperature", 1))
428
+ kwargs.get("temperature", 1.0))
429
429
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
430
- kwargs.get("presence_penalty", 0))
430
+ kwargs.get("presence_penalty", 0.0))
431
431
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
432
- kwargs.get("frequency_penalty", 0))
432
+ kwargs.get("frequency_penalty", 0.0))
433
433
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
434
434
  kwargs.get("seed", ""))
435
435
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -509,11 +509,11 @@ def azure_async_completions(gen_ai_endpoint, version, environment, application_n
509
509
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
510
510
  kwargs.get("tool_choice", ""))
511
511
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
512
- kwargs.get("temperature", 1))
512
+ kwargs.get("temperature", 1.0))
513
513
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
514
- kwargs.get("presence_penalty", 0))
514
+ kwargs.get("presence_penalty", 0.0))
515
515
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
516
- kwargs.get("frequency_penalty", 0))
516
+ kwargs.get("frequency_penalty", 0.0))
517
517
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
518
518
  kwargs.get("seed", ""))
519
519
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -122,15 +122,15 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
122
122
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
123
123
  kwargs.get("user", ""))
124
124
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
125
- kwargs.get("top_p", 1))
125
+ kwargs.get("top_p", 1.0))
126
126
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
127
- kwargs.get("max_tokens", ""))
127
+ kwargs.get("max_tokens", -1))
128
128
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
129
- kwargs.get("temperature", 1))
129
+ kwargs.get("temperature", 1.0))
130
130
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
131
- kwargs.get("presence_penalty", 0))
131
+ kwargs.get("presence_penalty", 0.0))
132
132
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
133
- kwargs.get("frequency_penalty", 0))
133
+ kwargs.get("frequency_penalty", 0.0))
134
134
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
135
135
  kwargs.get("seed", ""))
136
136
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -224,17 +224,17 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
224
224
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
225
225
  kwargs.get("model", "gpt-3.5-turbo"))
226
226
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
227
- kwargs.get("top_p", 1))
227
+ kwargs.get("top_p", 1.0))
228
228
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
229
- kwargs.get("max_tokens", ""))
229
+ kwargs.get("max_tokens", -1))
230
230
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
231
231
  kwargs.get("user", ""))
232
232
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
233
- kwargs.get("temperature", 1))
233
+ kwargs.get("temperature", 1.0))
234
234
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
235
- kwargs.get("presence_penalty", 0))
235
+ kwargs.get("presence_penalty", 0.0))
236
236
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
237
- kwargs.get("frequency_penalty", 0))
237
+ kwargs.get("frequency_penalty", 0.0))
238
238
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
239
239
  kwargs.get("seed", ""))
240
240
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -125,11 +125,11 @@ def azure_chat_completions(gen_ai_endpoint, version, environment, application_na
125
125
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
126
126
  kwargs.get("tool_choice", ""))
127
127
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
128
- kwargs.get("temperature", 1))
128
+ kwargs.get("temperature", 1.0))
129
129
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
130
- kwargs.get("presence_penalty", 0))
130
+ kwargs.get("presence_penalty", 0.0))
131
131
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
132
- kwargs.get("frequency_penalty", 0))
132
+ kwargs.get("frequency_penalty", 0.0))
133
133
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
134
134
  kwargs.get("seed", ""))
135
135
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -228,11 +228,11 @@ def azure_chat_completions(gen_ai_endpoint, version, environment, application_na
228
228
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
229
229
  kwargs.get("tool_choice", ""))
230
230
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
231
- kwargs.get("temperature", 1))
231
+ kwargs.get("temperature", 1.0))
232
232
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
233
- kwargs.get("presence_penalty", 0))
233
+ kwargs.get("presence_penalty", 0.0))
234
234
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
235
- kwargs.get("frequency_penalty", 0))
235
+ kwargs.get("frequency_penalty", 0.0))
236
236
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
237
237
  kwargs.get("seed", ""))
238
238
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -423,11 +423,11 @@ def azure_completions(gen_ai_endpoint, version, environment, application_name,
423
423
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
424
424
  kwargs.get("tool_choice", ""))
425
425
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
426
- kwargs.get("temperature", 1))
426
+ kwargs.get("temperature", 1.0))
427
427
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
428
- kwargs.get("presence_penalty", 0))
428
+ kwargs.get("presence_penalty", 0.0))
429
429
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
430
- kwargs.get("frequency_penalty", 0))
430
+ kwargs.get("frequency_penalty", 0.0))
431
431
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
432
432
  kwargs.get("seed", ""))
433
433
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -507,11 +507,11 @@ def azure_completions(gen_ai_endpoint, version, environment, application_name,
507
507
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
508
508
  kwargs.get("tool_choice", ""))
509
509
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
510
- kwargs.get("temperature", 1))
510
+ kwargs.get("temperature", 1.0))
511
511
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
512
- kwargs.get("presence_penalty", 0))
512
+ kwargs.get("presence_penalty", 0.0))
513
513
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
514
- kwargs.get("frequency_penalty", 0))
514
+ kwargs.get("frequency_penalty", 0.0))
515
515
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
516
516
  kwargs.get("seed", ""))
517
517
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -123,15 +123,15 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
123
123
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
124
124
  kwargs.get("user", ""))
125
125
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
126
- kwargs.get("top_p", 1))
126
+ kwargs.get("top_p", 1.0))
127
127
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
128
- kwargs.get("max_tokens", ""))
128
+ kwargs.get("max_tokens", -1))
129
129
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
130
- kwargs.get("temperature", 1))
130
+ kwargs.get("temperature", 1.0))
131
131
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
132
- kwargs.get("presence_penalty", 0))
132
+ kwargs.get("presence_penalty", 0.0))
133
133
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
134
- kwargs.get("frequency_penalty", 0))
134
+ kwargs.get("frequency_penalty", 0.0))
135
135
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
136
136
  kwargs.get("seed", ""))
137
137
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -225,17 +225,17 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
225
225
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
226
226
  kwargs.get("model", "gpt-3.5-turbo"))
227
227
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
228
- kwargs.get("top_p", 1))
228
+ kwargs.get("top_p", 1.0))
229
229
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
230
- kwargs.get("max_tokens", ""))
230
+ kwargs.get("max_tokens", -1))
231
231
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
232
232
  kwargs.get("user", ""))
233
233
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
234
- kwargs.get("temperature", 1))
234
+ kwargs.get("temperature", 1.0))
235
235
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
236
- kwargs.get("presence_penalty", 0))
236
+ kwargs.get("presence_penalty", 0.0))
237
237
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
238
- kwargs.get("frequency_penalty", 0))
238
+ kwargs.get("frequency_penalty", 0.0))
239
239
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
240
240
  kwargs.get("seed", ""))
241
241
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
@@ -87,7 +87,7 @@ def text_wrap(gen_ai_endpoint, version, environment, application_name,
87
87
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
88
88
  forward_params.get("top_p", "null"))
89
89
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
90
- forward_params.get("max_length", "null"))
90
+ forward_params.get("max_length", -1))
91
91
  span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
92
92
  prompt)
93
93
  if trace_content:
@@ -59,14 +59,14 @@ class SemanticConvetion:
59
59
  GEN_AI_REQUEST_IMAGE_STYLE = "gen_ai.request.image_style"
60
60
 
61
61
  # GenAI Usage
62
- GEN_AI_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
63
- GEN_AI_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
62
+ GEN_AI_USAGE_PROMPT_TOKENS = "gen_ai.usage.input_tokens"
63
+ GEN_AI_USAGE_COMPLETION_TOKENS = "gen_ai.usage.output_tokens"
64
64
  GEN_AI_USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens"
65
65
  GEN_AI_USAGE_COST = "gen_ai.usage.cost"
66
66
 
67
67
  # GenAI Response
68
68
  GEN_AI_RESPONSE_ID = "gen_ai.response.id"
69
- GEN_AI_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reason"
69
+ GEN_AI_RESPONSE_FINISH_REASON = "gen_ai.response.finish_reasons"
70
70
  GEN_AI_RESPONSE_IMAGE = "gen_ai.response.image" # Not used directly in code yet
71
71
 
72
72
  # GenAI Content
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openlit
3
- Version: 1.16.0
3
+ Version: 1.16.2
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects
5
5
  Home-page: https://github.com/openlit/openlit/tree/main/openlit/python
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT
@@ -21,7 +21,7 @@ Requires-Dist: opentelemetry-instrumentation (>=0.45b0,<0.46)
21
21
  Requires-Dist: opentelemetry-sdk (>=1.24.0,<2.0.0)
22
22
  Requires-Dist: requests (>=2.26.0,<3.0.0)
23
23
  Requires-Dist: schedule (>=1.2.2,<2.0.0)
24
- Requires-Dist: tiktoken (>=0.6.0,<0.7.0)
24
+ Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
25
25
  Project-URL: Repository, https://github.com/openlit/openlit/tree/main/openlit/python
26
26
  Description-Content-Type: text/markdown
27
27
 
@@ -1,14 +1,14 @@
1
1
  openlit/__helpers.py,sha256=lrn4PBs9owDudiCY2NBoVbAi7AU_HtUpyOj0oqPBsPY,5545
2
2
  openlit/__init__.py,sha256=eJKH1Op7wzBsuoBYuM_C022Jo7cCtRQBJxf2lpDfe_o,14981
3
3
  openlit/instrumentation/anthropic/__init__.py,sha256=oaU53BOPyfUKbEzYvLr1DPymDluurSnwo4Hernf2XdU,1955
4
- openlit/instrumentation/anthropic/anthropic.py,sha256=CYBui5eEfWdSfFF0xtCQjh1xO-gCVJc_V9Hli0szVZE,16026
5
- openlit/instrumentation/anthropic/async_anthropic.py,sha256=NW84kTQ3BkUx1zZuMRps_J7zTYkmq5BxOrqSjqWInBs,16068
6
- openlit/instrumentation/bedrock/__init__.py,sha256=QPvDMQde6Meodu5JvosHdZsnyExS19lcoP5Li4YrOkw,1540
7
- openlit/instrumentation/bedrock/bedrock.py,sha256=SsN1SFWFn7P84Z6irH_8OLY2mOctWsBG82f-cnroOhU,22276
4
+ openlit/instrumentation/anthropic/anthropic.py,sha256=AkQUmi_VtDzFKOMobQw6LTw_CwC27E0r_--7FnwCJ3A,16026
5
+ openlit/instrumentation/anthropic/async_anthropic.py,sha256=l-AjpkxllWAXTlv9rOm61Ktbei3csvWQPFWw7FvI6Cg,16068
6
+ openlit/instrumentation/bedrock/__init__.py,sha256=DLLYio4S4gUzRElqNRT8WMKzM79HZwOBVjXfJI4BfaA,1545
7
+ openlit/instrumentation/bedrock/bedrock.py,sha256=CtT_Ze1RxJjv710o_0uzTqZhgtW6gXkzV7G-v7rYYxo,9138
8
8
  openlit/instrumentation/chroma/__init__.py,sha256=61lFpHlUEQUobsUJZHXdvOViKwsOH8AOvSfc4VgCmiM,3253
9
9
  openlit/instrumentation/chroma/chroma.py,sha256=E80j_41UeZi8RzTsHbpvi1izOA_n-0-3_VdrA68AJPA,10531
10
10
  openlit/instrumentation/cohere/__init__.py,sha256=PC5T1qIg9pwLNocBP_WjG5B_6p_z019s8quk_fNLAMs,1920
11
- openlit/instrumentation/cohere/cohere.py,sha256=GvxIp55TJIu4YyG0_FwLBDHvAMUlAXyvMNIFhl2CQP4,20437
11
+ openlit/instrumentation/cohere/cohere.py,sha256=_FXytRRfRuHwNK-PME_X9LZIQjQ0Uq7QQa1Vq_y8NNY,20437
12
12
  openlit/instrumentation/elevenlabs/__init__.py,sha256=BZjAe-kzFJpKxT0tKksXVfZgirvgEp8qM3SfegWU5co,2631
13
13
  openlit/instrumentation/elevenlabs/async_elevenlabs.py,sha256=aDbSV5rXx-ZpBMea5DLERQDGW7uoegLMszhy-x3A1lw,5543
14
14
  openlit/instrumentation/elevenlabs/elevenlabs.py,sha256=AbMThG8edI778Dv85jtdUY2YkXD6s5auozXxH03iTvY,5942
@@ -18,8 +18,8 @@ openlit/instrumentation/gpt4all/__init__.py,sha256=-59CP2B3-HGZJ_vC-fI9Dt-0BuQXR
18
18
  openlit/instrumentation/gpt4all/gpt4all.py,sha256=iDu8CAat4j5VPAlhIdkGOclZvhFPG-u7zKwadsKeJps,17948
19
19
  openlit/instrumentation/gpu/__init__.py,sha256=Dj2MLar0DB20-t6W3pfR-3jfR_mwg4SYwhzIrH_n9sU,5596
20
20
  openlit/instrumentation/groq/__init__.py,sha256=uW_0G6HSanQyK2dIXYhzR604pDiyPQfybzc37DsfSew,1911
21
- openlit/instrumentation/groq/async_groq.py,sha256=aOwgoUrEqIgLSlnAtJnaGIF8T_LUlpTnOzPNBIUwez4,19076
22
- openlit/instrumentation/groq/groq.py,sha256=iMh4TPwBEJ7Eg6Gi4x6KYpELtQKDXIsgLrh6kQHVkHc,19040
21
+ openlit/instrumentation/groq/async_groq.py,sha256=AiKx_f4wuJqiiI6hhu7qbKsOXOWzLug3R0QMkZHfC10,19092
22
+ openlit/instrumentation/groq/groq.py,sha256=jJVGpc5DlQ5xD7FgDaG20pV3qfFzbdpjUf6LdULpaJg,19056
23
23
  openlit/instrumentation/haystack/__init__.py,sha256=QK6XxxZUHX8vMv2Crk7rNBOc64iOOBLhJGL_lPlAZ8s,1758
24
24
  openlit/instrumentation/haystack/haystack.py,sha256=oQIZiDhdp3gnJnhYQ1OouJMc9YT0pQ-_31cmNuopa68,3891
25
25
  openlit/instrumentation/langchain/__init__.py,sha256=19C7YGSF-6u5VlvKkThNS4zZqvxw-fQfRsKufZ9onfk,2881
@@ -29,29 +29,29 @@ openlit/instrumentation/llamaindex/llamaindex.py,sha256=uiIigbwhonSbJWA7LpgOVI1R
29
29
  openlit/instrumentation/milvus/__init__.py,sha256=qi1yfmMrvkDtnrN_6toW8qC9BRL78bq7ayWpObJ8Bq4,2961
30
30
  openlit/instrumentation/milvus/milvus.py,sha256=qhKIoggBAJhRctRrBYz69AcvXH-eh7oBn_l9WfxpAjI,9121
31
31
  openlit/instrumentation/mistral/__init__.py,sha256=zJCIpFWRbsYrvooOJYuqwyuKeSOQLWbyXWCObL-Snks,3156
32
- openlit/instrumentation/mistral/async_mistral.py,sha256=PXpiLwkonTtAPVOUh9pXMSYeabwH0GFG_HRDWrEKhMM,21361
33
- openlit/instrumentation/mistral/mistral.py,sha256=nbAyMlPiuA9hihePkM_nnxAjahZSndT-B-qXRO5VIhk,21212
32
+ openlit/instrumentation/mistral/async_mistral.py,sha256=uv5P5ow6b78QWJidIXY3Sl6X8re09ITtLRdji2L97Dw,21365
33
+ openlit/instrumentation/mistral/mistral.py,sha256=-uLlPPl3U3670DWUBetLkoYvT83eJlHPnLxXjr5qI7M,21216
34
34
  openlit/instrumentation/ollama/__init__.py,sha256=cOax8PiypDuo_FC4WvDCYBRo7lH5nV9xU92h7k-eZbg,3812
35
35
  openlit/instrumentation/ollama/async_ollama.py,sha256=ESk1zZTj2hPmkWIH5F2owuoo0apleDSSx5VORlO3e3w,28991
36
36
  openlit/instrumentation/ollama/ollama.py,sha256=PLGF9RB3TRNZ9GSGqeGVvKFBtgUK8Hc8xwvk-3NPeGI,28901
37
37
  openlit/instrumentation/openai/__init__.py,sha256=AZ2cPr3TMKkgGdMl_yXMeSi7bWhtmMqOW1iHdzHHGHA,16265
38
- openlit/instrumentation/openai/async_azure_openai.py,sha256=e_Tw85tMhKR11jifWUK4PgqABUinfkH5Bs6eANc0xBE,46278
39
- openlit/instrumentation/openai/async_openai.py,sha256=f7FJfs996Rk7qZEZvaZ1YeRTBrDwjZW94QKtx9vmIck,45828
40
- openlit/instrumentation/openai/azure_openai.py,sha256=R4It9gRaoBav7JUKjarJBIywbr2j_BAF6MkvCr9EP64,46072
41
- openlit/instrumentation/openai/openai.py,sha256=7Dq7EEQH5GjIExj2f_A_DSZYixh3PxxJ54UqSjPCP8c,46509
38
+ openlit/instrumentation/openai/async_azure_openai.py,sha256=Y0HIFwCZ6EAIQ8DgwSkVvDSGd53oohWla00T6tw3BrQ,46302
39
+ openlit/instrumentation/openai/async_openai.py,sha256=mzMz2j6hYK-mxIgI2fSEoYUPAZroHkv_6pTBI1fRu2c,45844
40
+ openlit/instrumentation/openai/azure_openai.py,sha256=XUf5eLR1_ggpoWSC46vTBkKXiyNd-_fRxx70XGCHj2M,46096
41
+ openlit/instrumentation/openai/openai.py,sha256=TWr0U6bZrgurrm5rM7EUgmEk-L_pxIGd_LPTq2K9SUE,46525
42
42
  openlit/instrumentation/pinecone/__init__.py,sha256=Mv9bElqNs07_JQkYyNnO0wOM3hdbprmw7sttdMeKC7g,2526
43
43
  openlit/instrumentation/pinecone/pinecone.py,sha256=0EhLmtOuvwWVvAKh3e56wyd8wzQq1oaLOmF15SVHxVE,8765
44
44
  openlit/instrumentation/qdrant/__init__.py,sha256=OJIg17-IGmBEvBYVKjCHcJ0hFXuEL7XV_jzUTqkolN8,4799
45
45
  openlit/instrumentation/qdrant/qdrant.py,sha256=4uHKYGvWQtRAEVLUWo3o4joJw7hFm2NxVuBu5YKZKiI,14456
46
46
  openlit/instrumentation/transformers/__init__.py,sha256=4GBtjzcJU4XiPexIUYEqF3pNZMeQw4Gm5B-cyumaFjs,1468
47
- openlit/instrumentation/transformers/transformers.py,sha256=C4lappTUaRZ818jK8PqFXcLd8uMqh0LbXRiXuJYzJPk,7608
47
+ openlit/instrumentation/transformers/transformers.py,sha256=HCpG-gC5W9F2ekbol3HsuNjXb4jrM_D7YLtHDlV4STc,7604
48
48
  openlit/instrumentation/vertexai/__init__.py,sha256=N3E9HtzefD-zC0fvmfGYiDmSqssoavp_i59wfuYLyMw,6079
49
49
  openlit/instrumentation/vertexai/async_vertexai.py,sha256=PMHYyLf1J4gZpC_-KZ_ZVx1xIHhZDJSNa7mrjNXZ5M0,52372
50
50
  openlit/instrumentation/vertexai/vertexai.py,sha256=UvpNKBHPoV9idVMfGigZnmWuEQiyqSwZn0zK9-U7Lzw,52125
51
51
  openlit/otel/metrics.py,sha256=O7NoaDz0bY19mqpE4-0PcKwEe-B-iJFRgOCaanAuZAc,4291
52
52
  openlit/otel/tracing.py,sha256=vL1ifMbARPBpqK--yXYsCM6y5dSu5LFIKqkhZXtYmUc,3712
53
- openlit/semcov/__init__.py,sha256=Z83zteHGuj5WrYShnDky5l8AMy3L8Okua7nD10eI2Bs,7345
54
- openlit-1.16.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
- openlit-1.16.0.dist-info/METADATA,sha256=bF8GCqcgLUy0fTqY1o-1mDytjMzElIyRzilxZEJSDDQ,14120
56
- openlit-1.16.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
57
- openlit-1.16.0.dist-info/RECORD,,
53
+ openlit/semcov/__init__.py,sha256=KIKPDAXA29wu6XmHEfDprvlbvf83FJaprsCIbfChfBs,7341
54
+ openlit-1.16.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
55
+ openlit-1.16.2.dist-info/METADATA,sha256=fM-cjlEUgxcGy1D0mbWk8fsxryBOE8TLIELImwjAvIg,14120
56
+ openlit-1.16.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
57
+ openlit-1.16.2.dist-info/RECORD,,