openlit 1.33.8__py3-none-any.whl → 1.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. openlit/__helpers.py +83 -0
  2. openlit/__init__.py +1 -1
  3. openlit/instrumentation/ag2/ag2.py +2 -2
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +4 -4
  13. openlit/instrumentation/astra/async_astra.py +4 -4
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +4 -4
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +2 -2
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
  26. openlit/instrumentation/crewai/crewai.py +2 -2
  27. openlit/instrumentation/dynamiq/dynamiq.py +2 -2
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
  30. openlit/instrumentation/embedchain/embedchain.py +4 -4
  31. openlit/instrumentation/firecrawl/firecrawl.py +2 -2
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/gpt4all.py +17 -17
  36. openlit/instrumentation/groq/async_groq.py +14 -14
  37. openlit/instrumentation/groq/groq.py +14 -14
  38. openlit/instrumentation/haystack/haystack.py +2 -2
  39. openlit/instrumentation/julep/async_julep.py +2 -2
  40. openlit/instrumentation/julep/julep.py +2 -2
  41. openlit/instrumentation/langchain/langchain.py +36 -31
  42. openlit/instrumentation/letta/letta.py +6 -6
  43. openlit/instrumentation/litellm/async_litellm.py +20 -20
  44. openlit/instrumentation/litellm/litellm.py +20 -20
  45. openlit/instrumentation/llamaindex/llamaindex.py +2 -2
  46. openlit/instrumentation/mem0/mem0.py +2 -2
  47. openlit/instrumentation/milvus/milvus.py +4 -4
  48. openlit/instrumentation/mistral/async_mistral.py +18 -18
  49. openlit/instrumentation/mistral/mistral.py +18 -18
  50. openlit/instrumentation/multion/async_multion.py +2 -2
  51. openlit/instrumentation/multion/multion.py +2 -2
  52. openlit/instrumentation/ollama/async_ollama.py +29 -29
  53. openlit/instrumentation/ollama/ollama.py +29 -29
  54. openlit/instrumentation/openai/__init__.py +11 -230
  55. openlit/instrumentation/openai/async_openai.py +434 -409
  56. openlit/instrumentation/openai/openai.py +415 -393
  57. openlit/instrumentation/phidata/phidata.py +2 -2
  58. openlit/instrumentation/pinecone/pinecone.py +4 -4
  59. openlit/instrumentation/premai/premai.py +20 -20
  60. openlit/instrumentation/qdrant/async_qdrant.py +4 -4
  61. openlit/instrumentation/qdrant/qdrant.py +4 -4
  62. openlit/instrumentation/reka/async_reka.py +6 -6
  63. openlit/instrumentation/reka/reka.py +6 -6
  64. openlit/instrumentation/together/async_together.py +18 -18
  65. openlit/instrumentation/together/together.py +18 -18
  66. openlit/instrumentation/transformers/transformers.py +6 -6
  67. openlit/instrumentation/vertexai/async_vertexai.py +53 -53
  68. openlit/instrumentation/vertexai/vertexai.py +53 -53
  69. openlit/instrumentation/vllm/vllm.py +6 -6
  70. openlit/otel/metrics.py +98 -7
  71. openlit/semcov/__init__.py +113 -80
  72. {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/METADATA +1 -1
  73. openlit-1.33.9.dist-info/RECORD +121 -0
  74. {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
  75. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  76. openlit/instrumentation/openai/azure_openai.py +0 -898
  77. openlit-1.33.8.dist-info/RECORD +0 -122
  78. {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -32,7 +32,7 @@ class BedrockInstrumentor(BaseInstrumentor):
32
32
  wrap_function_wrapper(
33
33
  "botocore.client",
34
34
  "ClientCreator.create_client",
35
- converse("bedrock.converse", version, environment, application_name,
35
+ converse(version, environment, application_name,
36
36
  tracer, pricing_info, trace_content, metrics, disable_metrics),
37
37
  )
38
38
 
@@ -1,17 +1,22 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, protected-access, too-many-branches
2
1
  """
3
2
  Module for monitoring Amazon Bedrock API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from botocore.response import StreamingBody
8
8
  from botocore.exceptions import ReadTimeoutError, ResponseStreamingError
9
9
  from urllib3.exceptions import ProtocolError as URLLib3ProtocolError
10
10
  from urllib3.exceptions import ReadTimeoutError as URLLib3ReadTimeoutError
11
11
  from opentelemetry.trace import SpanKind, Status, StatusCode
12
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
13
- from openlit.__helpers import get_chat_model_cost
14
- from openlit.__helpers import handle_exception
12
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
13
+ from openlit.__helpers import (
14
+ get_chat_model_cost,
15
+ handle_exception,
16
+ response_as_dict,
17
+ create_metrics_attributes,
18
+ set_server_address_and_port
19
+ )
15
20
  from openlit.semcov import SemanticConvetion
16
21
 
17
22
  # Initialize logger for logging potential issues and operations
@@ -47,8 +52,7 @@ class CustomStreamWrapper(StreamingBody):
47
52
 
48
53
  return data_chunk
49
54
 
50
-
51
- def converse(gen_ai_endpoint, version, environment, application_name, tracer,
55
+ def converse(version, environment, application_name, tracer,
52
56
  pricing_info, trace_content, metrics, disable_metrics):
53
57
  """
54
58
  Generates a telemetry wrapper for messages to collect metrics.
@@ -91,52 +95,104 @@ def converse(gen_ai_endpoint, version, environment, application_name, tracer,
91
95
  Returns:
92
96
  The modified response with telemetry.
93
97
  """
94
- with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
98
+
99
+ server_address, server_port = set_server_address_and_port(instance, 'aws.amazon.com', 443)
100
+ request_model = method_kwargs.get('modelId', 'amazon.titan-text-express-v1')
101
+
102
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
103
+
104
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
105
+ start_time = time.time()
95
106
  response = original_method(*method_args, **method_kwargs)
107
+ end_time = time.time()
108
+
109
+ response_dict = response_as_dict(response)
96
110
 
97
111
  try:
98
- message_prompt = method_kwargs.get("messages", "")
112
+ message_prompt = method_kwargs.get('messages', '')
99
113
  formatted_messages = []
100
114
  for message in message_prompt:
101
- role = message["role"]
102
- content = message["content"]
115
+ role = message['role']
116
+ content = message['content']
103
117
 
104
118
  if isinstance(content, list):
105
- # pylint: disable=line-too-long
106
119
  content_str = ", ".join(f'text: {item["text"]}' for item in content if "text" in item)
107
- formatted_messages.append(f"{role}: {content_str}")
120
+ formatted_messages.append(f'{role}: {content_str}')
108
121
  else:
109
- formatted_messages.append(f"{role}: {content}")
110
- prompt = "\n".join(formatted_messages)
122
+ formatted_messages.append(f'{role}: {content}')
123
+ prompt = '\n'.join(formatted_messages)
124
+
125
+ input_tokens = response_dict.get('usage').get('inputTokens')
126
+ output_tokens = response_dict.get('usage').get('outputTokens')
111
127
 
112
- model = method_kwargs.get("modelId", "amazon.titan-text-express-v1")
113
- input_tokens = response["usage"]["inputTokens"]
114
- output_tokens = response["usage"]["outputTokens"]
128
+ # Calculate cost of the operation
129
+ cost = get_chat_model_cost(request_model, pricing_info,
130
+ input_tokens, output_tokens)
131
+
132
+ llm_response = response_dict.get('output').get('message').get('content')[0].get('text')
115
133
 
116
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
134
+ # Set base span attribues (OTel Semconv)
135
+ span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
136
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
137
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
117
138
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
118
- SemanticConvetion.GEN_AI_SYSTEM_BEDROCK)
119
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
120
- gen_ai_endpoint)
121
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
122
- environment)
123
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
124
- application_name)
139
+ SemanticConvetion.GEN_AI_SYSTEM_AWS_BEDROCK)
125
140
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
126
- model)
127
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
141
+ request_model)
142
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
143
+ server_port)
144
+
145
+ inference_config = method_kwargs.get('inferenceConfig', {})
146
+
147
+ # List of attributes and their config keys
148
+ attributes = [
149
+ (SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY, 'frequencyPenalty'),
150
+ (SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS, 'maxTokens'),
151
+ (SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY, 'presencePenalty'),
152
+ (SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES, 'stopSequences'),
153
+ (SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE, 'temperature'),
154
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_P, 'topP'),
155
+ (SemanticConvetion.GEN_AI_REQUEST_TOP_K, 'topK'),
156
+ ]
157
+
158
+ # Set each attribute if the corresponding value exists and is not None
159
+ for attribute, key in attributes:
160
+ value = inference_config.get(key)
161
+ if value is not None:
162
+ span.set_attribute(attribute, value)
163
+
164
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
165
+ response_dict.get('ResponseMetadata').get('RequestId'))
166
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
167
+ request_model)
168
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
128
169
  input_tokens)
129
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
170
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
130
171
  output_tokens)
172
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
173
+ server_address)
174
+ if isinstance(llm_response, str):
175
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
176
+ 'text')
177
+ else:
178
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
179
+ 'json')
180
+
181
+ # Set base span attribues (Extras)
182
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
183
+ environment)
184
+ span.set_attribute(SERVICE_NAME,
185
+ application_name)
186
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
187
+ False)
131
188
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
132
189
  input_tokens + output_tokens)
133
-
134
- # Calculate cost of the operation
135
- cost = get_chat_model_cost(model,
136
- pricing_info, input_tokens,
137
- output_tokens)
138
190
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
139
- cost)
191
+ cost)
192
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
193
+ end_time - start_time)
194
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
195
+ version)
140
196
 
141
197
  if trace_content:
142
198
  span.add_event(
@@ -148,42 +204,43 @@ def converse(gen_ai_endpoint, version, environment, application_name, tracer,
148
204
  span.add_event(
149
205
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
150
206
  attributes={
151
- # pylint: disable=line-too-long
152
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response["output"]["message"]["content"][0]["text"],
207
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llm_response,
153
208
  },
154
209
  )
155
210
 
156
211
  span.set_status(Status(StatusCode.OK))
157
212
 
158
213
  if disable_metrics is False:
159
- attributes = {
160
- TELEMETRY_SDK_NAME:
161
- "openlit",
162
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
163
- application_name,
164
- SemanticConvetion.GEN_AI_SYSTEM:
165
- SemanticConvetion.GEN_AI_SYSTEM_BEDROCK,
166
- SemanticConvetion.GEN_AI_ENVIRONMENT:
167
- environment,
168
- SemanticConvetion.GEN_AI_TYPE:
169
- SemanticConvetion.GEN_AI_TYPE_CHAT,
170
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
171
- model
172
- }
173
-
174
- metrics["genai_requests"].add(1, attributes)
175
- metrics["genai_total_tokens"].add(
214
+ attributes = create_metrics_attributes(
215
+ service_name=application_name,
216
+ deployment_environment=environment,
217
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
218
+ system=SemanticConvetion.GEN_AI_SYSTEM_AWS_BEDROCK,
219
+ request_model=request_model,
220
+ server_address=server_address,
221
+ server_port=server_port,
222
+ response_model=request_model,
223
+ )
224
+
225
+ metrics['genai_client_usage_tokens'].record(
176
226
  input_tokens + output_tokens, attributes
177
227
  )
178
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
179
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
180
- metrics["genai_cost"].record(cost, attributes)
228
+ metrics['genai_client_operation_duration'].record(
229
+ end_time - start_time, attributes
230
+ )
231
+ metrics['genai_server_ttft'].record(
232
+ end_time - start_time, attributes
233
+ )
234
+ metrics['genai_requests'].add(1, attributes)
235
+ metrics['genai_completion_tokens'].add(output_tokens, attributes)
236
+ metrics['genai_prompt_tokens'].add(input_tokens, attributes)
237
+ metrics['genai_cost'].record(cost, attributes)
181
238
 
182
239
  return response
183
240
 
184
241
  except Exception as e:
185
242
  handle_exception(span, e)
186
- logger.error("Error in trace creation: %s", e)
243
+ logger.error('Error in trace creation: %s', e)
187
244
 
188
245
  # Return original response
189
246
  return response
@@ -192,7 +249,7 @@ def converse(gen_ai_endpoint, version, environment, application_name, tracer,
192
249
  client = wrapped(*args, **kwargs)
193
250
 
194
251
  # Replace the original method with the instrumented one
195
- if kwargs.get("service_name") == "bedrock-runtime":
252
+ if kwargs.get('service_name') == 'bedrock-runtime':
196
253
  original_invoke_model = client.converse
197
254
  client.converse = lambda *args, **kwargs: converse_wrapper(original_invoke_model,
198
255
  *args, **kwargs)
@@ -77,8 +77,8 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
77
77
  environment)
78
78
  span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
79
79
  application_name)
80
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
81
- SemanticConvetion.GEN_AI_TYPE_VECTORDB)
80
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
81
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB)
82
82
  span.set_attribute(SemanticConvetion.DB_SYSTEM,
83
83
  SemanticConvetion.DB_SYSTEM_CHROMA)
84
84
  span.set_attribute(SemanticConvetion.DB_COLLECTION_NAME,
@@ -179,8 +179,8 @@ def general_wrap(gen_ai_endpoint, version, environment, application_name,
179
179
  SemanticConvetion.DB_SYSTEM_CHROMA,
180
180
  SemanticConvetion.GEN_AI_ENVIRONMENT:
181
181
  environment,
182
- SemanticConvetion.GEN_AI_TYPE:
183
- SemanticConvetion.GEN_AI_TYPE_VECTORDB,
182
+ SemanticConvetion.GEN_AI_OPERATION:
183
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_VECTORDB,
184
184
  SemanticConvetion.DB_OPERATION:
185
185
  db_operation
186
186
  }
@@ -6,8 +6,10 @@ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
6
6
  from wrapt import wrap_function_wrapper
7
7
 
8
8
  from openlit.instrumentation.cohere.cohere import chat, chat_stream, embed
9
+ from openlit.instrumentation.cohere.async_cohere import async_chat, async_chat_stream, async_embed
9
10
 
10
- _instruments = ("cohere >= 5.3.2",)
11
+
12
+ _instruments = ("cohere >= 5.14.0",)
11
13
 
12
14
  class CohereInstrumentor(BaseInstrumentor):
13
15
  """An instrumentor for Cohere's client library."""
@@ -25,24 +27,45 @@ class CohereInstrumentor(BaseInstrumentor):
25
27
  disable_metrics = kwargs.get("disable_metrics")
26
28
  version = importlib.metadata.version("cohere")
27
29
 
30
+ # Sync Client
31
+ wrap_function_wrapper(
32
+ "cohere.client_v2",
33
+ "ClientV2.chat",
34
+ chat(version, environment, application_name,
35
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
36
+ )
37
+ wrap_function_wrapper(
38
+ "cohere.client_v2",
39
+ "ClientV2.chat_stream",
40
+ chat_stream(version, environment, application_name,
41
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
42
+ )
43
+ wrap_function_wrapper(
44
+ "cohere.client_v2",
45
+ "ClientV2.embed",
46
+ embed(version, environment, application_name,
47
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
48
+ )
49
+
50
+ # Async Client
28
51
  wrap_function_wrapper(
29
- "cohere.client",
30
- "Client.chat",
31
- chat("cohere.chat", version, environment, application_name,
52
+ "cohere.client_v2",
53
+ "AsyncClientV2.chat",
54
+ async_chat(version, environment, application_name,
32
55
  tracer, pricing_info, trace_content, metrics, disable_metrics),
33
56
  )
34
57
 
35
58
  wrap_function_wrapper(
36
- "cohere.client",
37
- "Client.chat_stream",
38
- chat_stream("cohere.chat", version, environment, application_name,
59
+ "cohere.client_v2",
60
+ "AsyncClientV2.chat_stream",
61
+ async_chat_stream(version, environment, application_name,
39
62
  tracer, pricing_info, trace_content, metrics, disable_metrics),
40
63
  )
41
64
 
42
65
  wrap_function_wrapper(
43
- "cohere.client",
44
- "Client.embed",
45
- embed("cohere.embed", version, environment, application_name,
66
+ "cohere.client_v2",
67
+ "AsyncClientV2.embed",
68
+ async_embed(version, environment, application_name,
46
69
  tracer, pricing_info, trace_content, metrics, disable_metrics),
47
70
  )
48
71