openlit 1.33.8__py3-none-any.whl → 1.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. openlit/__helpers.py +83 -0
  2. openlit/__init__.py +1 -1
  3. openlit/instrumentation/ag2/ag2.py +2 -2
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +4 -4
  13. openlit/instrumentation/astra/async_astra.py +4 -4
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +4 -4
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +2 -2
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
  26. openlit/instrumentation/crewai/crewai.py +2 -2
  27. openlit/instrumentation/dynamiq/dynamiq.py +2 -2
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
  30. openlit/instrumentation/embedchain/embedchain.py +4 -4
  31. openlit/instrumentation/firecrawl/firecrawl.py +2 -2
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/gpt4all.py +17 -17
  36. openlit/instrumentation/groq/async_groq.py +14 -14
  37. openlit/instrumentation/groq/groq.py +14 -14
  38. openlit/instrumentation/haystack/haystack.py +2 -2
  39. openlit/instrumentation/julep/async_julep.py +2 -2
  40. openlit/instrumentation/julep/julep.py +2 -2
  41. openlit/instrumentation/langchain/langchain.py +36 -31
  42. openlit/instrumentation/letta/letta.py +6 -6
  43. openlit/instrumentation/litellm/async_litellm.py +20 -20
  44. openlit/instrumentation/litellm/litellm.py +20 -20
  45. openlit/instrumentation/llamaindex/llamaindex.py +2 -2
  46. openlit/instrumentation/mem0/mem0.py +2 -2
  47. openlit/instrumentation/milvus/milvus.py +4 -4
  48. openlit/instrumentation/mistral/async_mistral.py +18 -18
  49. openlit/instrumentation/mistral/mistral.py +18 -18
  50. openlit/instrumentation/multion/async_multion.py +2 -2
  51. openlit/instrumentation/multion/multion.py +2 -2
  52. openlit/instrumentation/ollama/async_ollama.py +29 -29
  53. openlit/instrumentation/ollama/ollama.py +29 -29
  54. openlit/instrumentation/openai/__init__.py +11 -230
  55. openlit/instrumentation/openai/async_openai.py +434 -409
  56. openlit/instrumentation/openai/openai.py +415 -393
  57. openlit/instrumentation/phidata/phidata.py +2 -2
  58. openlit/instrumentation/pinecone/pinecone.py +4 -4
  59. openlit/instrumentation/premai/premai.py +20 -20
  60. openlit/instrumentation/qdrant/async_qdrant.py +4 -4
  61. openlit/instrumentation/qdrant/qdrant.py +4 -4
  62. openlit/instrumentation/reka/async_reka.py +6 -6
  63. openlit/instrumentation/reka/reka.py +6 -6
  64. openlit/instrumentation/together/async_together.py +18 -18
  65. openlit/instrumentation/together/together.py +18 -18
  66. openlit/instrumentation/transformers/transformers.py +6 -6
  67. openlit/instrumentation/vertexai/async_vertexai.py +53 -53
  68. openlit/instrumentation/vertexai/vertexai.py +53 -53
  69. openlit/instrumentation/vllm/vllm.py +6 -6
  70. openlit/otel/metrics.py +98 -7
  71. openlit/semcov/__init__.py +113 -80
  72. {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/METADATA +1 -1
  73. openlit-1.33.9.dist-info/RECORD +121 -0
  74. {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
  75. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  76. openlit/instrumentation/openai/azure_openai.py +0 -898
  77. openlit-1.33.8.dist-info/RECORD +0 -122
  78. {openlit-1.33.8.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,29 +1,37 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
1
  """
3
2
  Module for monitoring Cohere API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
- from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, handle_exception
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
+ from openlit.__helpers import (
10
+ get_chat_model_cost,
11
+ get_embed_model_cost,
12
+ handle_exception,
13
+ response_as_dict,
14
+ calculate_ttft,
15
+ calculate_tbt,
16
+ create_metrics_attributes,
17
+ set_server_address_and_port
18
+ )
10
19
  from openlit.semcov import SemanticConvetion
11
20
 
12
21
  # Initialize logger for logging potential issues and operations
13
22
  logger = logging.getLogger(__name__)
14
23
 
15
- def embed(gen_ai_endpoint, version, environment, application_name, tracer,
24
+ def embed(version, environment, application_name, tracer,
16
25
  pricing_info, trace_content, metrics, disable_metrics):
17
26
  """
18
27
  Generates a telemetry wrapper for embeddings to collect metrics.
19
28
 
20
29
  Args:
21
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
22
30
  version: Version of the monitoring package.
23
31
  environment: Deployment environment (e.g., production, staging).
24
- application_name: Name of the application using the OpenAI API.
32
+ application_name: Name of the application using the Cohere API.
25
33
  tracer: OpenTelemetry tracer for creating spans.
26
- pricing_info: Information used for calculating the cost of OpenAI usage.
34
+ pricing_info: Information used for calculating the cost of Cohere usage.
27
35
  trace_content: Flag indicating whether to trace the actual content.
28
36
 
29
37
  Returns:
@@ -47,80 +55,85 @@ def embed(gen_ai_endpoint, version, environment, application_name, tracer,
47
55
  The response from the original 'embed' method.
48
56
  """
49
57
 
50
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
51
- response = wrapped(*args, **kwargs)
58
+ server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
59
+ request_model = kwargs.get("model", "mbed-english-v3.0")
52
60
 
53
- try:
54
- # Get prompt from kwargs and store as a single string
55
- prompt = " ".join(kwargs.get("texts", []))
61
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
56
62
 
63
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
64
+ start_time = time.time()
65
+ response = wrapped(*args, **kwargs)
66
+ end_time = time.time()
57
67
 
68
+ response_dict = response_as_dict(response)
69
+ try:
70
+ input_tokens = response_dict.get('meta').get('billed_units').get('input_tokens')
58
71
  # Calculate cost of the operation
59
72
  cost = get_embed_model_cost(kwargs.get("model", "embed-english-v2.0"),
60
- pricing_info,
61
- response.meta.billed_units.input_tokens)
73
+ pricing_info, input_tokens)
62
74
 
63
- # Set Span attributes
75
+ # Set Span attributes (OTel Semconv)
64
76
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
77
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
78
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
65
79
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
66
80
  SemanticConvetion.GEN_AI_SYSTEM_COHERE)
67
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
68
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
69
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
70
- gen_ai_endpoint)
71
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
81
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
82
+ request_model)
83
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
84
+ kwargs.get('embedding_types', ['float']))
85
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
86
+ request_model)
87
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
88
+ server_address)
89
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
90
+ server_port)
91
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
92
+ input_tokens)
93
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
94
+ response_dict.get('response_type'))
95
+
96
+ # Set Span attributes (Extras)
97
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
72
98
  environment)
73
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
99
+ span.set_attribute(SERVICE_NAME,
74
100
  application_name)
75
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
76
- kwargs.get("model", "embed-english-v2.0"))
77
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
78
- kwargs.get("embedding_types", "float"))
79
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
80
- kwargs.get("input_type", ""))
81
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
82
- kwargs.get("user", ""))
83
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
84
- response.id)
85
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
86
- response.meta.billed_units.input_tokens)
87
101
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
88
- response.meta.billed_units.input_tokens)
102
+ input_tokens)
89
103
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
90
104
  cost)
105
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
106
+ version)
107
+
91
108
  if trace_content:
92
109
  span.add_event(
93
110
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
94
111
  attributes={
95
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
112
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get("texts", "")),
96
113
  },
97
114
  )
98
115
 
99
116
  span.set_status(Status(StatusCode.OK))
100
117
 
101
118
  if disable_metrics is False:
102
- attributes = {
103
- TELEMETRY_SDK_NAME:
104
- "openlit",
105
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
106
- application_name,
107
- SemanticConvetion.GEN_AI_SYSTEM:
108
- SemanticConvetion.GEN_AI_SYSTEM_COHERE,
109
- SemanticConvetion.GEN_AI_ENVIRONMENT:
110
- environment,
111
- SemanticConvetion.GEN_AI_TYPE:
112
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
113
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
114
- kwargs.get("model", "embed-english-v2.0")
115
- }
116
-
117
- metrics["genai_requests"].add(1, attributes)
118
- metrics["genai_total_tokens"].add(
119
- response.meta.billed_units.input_tokens, attributes
119
+ attributes = create_metrics_attributes(
120
+ service_name=application_name,
121
+ deployment_environment=environment,
122
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
123
+ system=SemanticConvetion.GEN_AI_SYSTEM_COHERE,
124
+ request_model=request_model,
125
+ server_address=server_address,
126
+ server_port=server_port,
127
+ response_model=request_model,
120
128
  )
121
- metrics["genai_prompt_tokens"].add(
122
- response.meta.billed_units.input_tokens, attributes
129
+ metrics["genai_client_usage_tokens"].record(
130
+ input_tokens, attributes
131
+ )
132
+ metrics["genai_client_operation_duration"].record(
133
+ end_time - start_time, attributes
123
134
  )
135
+ metrics["genai_requests"].add(1, attributes)
136
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
124
137
  metrics["genai_cost"].record(cost, attributes)
125
138
 
126
139
  # Return original response
@@ -135,18 +148,17 @@ def embed(gen_ai_endpoint, version, environment, application_name, tracer,
135
148
 
136
149
  return wrapper
137
150
 
138
- def chat(gen_ai_endpoint, version, environment, application_name, tracer,
151
+ def chat(version, environment, application_name, tracer,
139
152
  pricing_info, trace_content, metrics, disable_metrics):
140
153
  """
141
154
  Generates a telemetry wrapper for chat to collect metrics.
142
155
 
143
156
  Args:
144
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
145
157
  version: Version of the monitoring package.
146
158
  environment: Deployment environment (e.g., production, staging).
147
- application_name: Name of the application using the OpenAI API.
159
+ application_name: Name of the application using the Cohere API.
148
160
  tracer: OpenTelemetry tracer for creating spans.
149
- pricing_info: Information used for calculating the cost of OpenAI usage.
161
+ pricing_info: Information used for calculating the cost of Cohere usage.
150
162
  trace_content: Flag indicating whether to trace the actual content.
151
163
 
152
164
  Returns:
@@ -170,96 +182,145 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
170
182
  The response from the original 'chat' method.
171
183
  """
172
184
 
173
- with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
185
+ server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
186
+ request_model = kwargs.get("model", "command-r-plus-08-2024")
187
+
188
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
189
+
190
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
191
+ start_time = time.time()
174
192
  response = wrapped(*args, **kwargs)
193
+ end_time = time.time()
194
+
195
+ response_dict = response_as_dict(response)
175
196
 
176
197
  try:
198
+ # Format 'messages' into a single string
199
+ message_prompt = kwargs.get("messages", "")
200
+ formatted_messages = []
201
+ for message in message_prompt:
202
+ role = message["role"]
203
+ content = message["content"]
204
+
205
+ if isinstance(content, list):
206
+ content_str = ", ".join(
207
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
208
+ if "type" in item else f'text: {item["text"]}'
209
+ for item in content
210
+ )
211
+ formatted_messages.append(f"{role}: {content_str}")
212
+ else:
213
+ formatted_messages.append(f"{role}: {content}")
214
+ prompt = "\n".join(formatted_messages)
215
+
216
+ input_tokens = response_dict.get('usage').get('billed_units').get('input_tokens')
217
+ output_tokens = response_dict.get('usage').get('billed_units').get('output_tokens')
218
+
177
219
  # Calculate cost of the operation
178
- cost = get_chat_model_cost(kwargs.get("model", "command"),
179
- pricing_info,
180
- response.meta.billed_units.input_tokens,
181
- response.meta.billed_units.output_tokens)
220
+ cost = get_chat_model_cost(request_model, pricing_info,
221
+ input_tokens, output_tokens)
222
+
223
+ llm_response = response_dict.get('message').get('content')[0].get('text')
182
224
 
183
- # Set Span attributes
225
+ # Set base span attribues (OTel Semconv)
184
226
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
227
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
228
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
185
229
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
186
230
  SemanticConvetion.GEN_AI_SYSTEM_COHERE)
187
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
188
- SemanticConvetion.GEN_AI_TYPE_CHAT)
189
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
190
- gen_ai_endpoint)
191
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
192
- environment)
193
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
194
- application_name)
195
231
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
196
- kwargs.get("model", "command"))
197
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
198
- kwargs.get("temperature", 0.3))
199
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
200
- kwargs.get("max_tokens", -1))
232
+ request_model)
201
233
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
202
234
  kwargs.get("seed", ""))
235
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
236
+ server_port)
203
237
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
204
238
  kwargs.get("frequency_penalty", 0.0))
239
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
240
+ kwargs.get("max_tokens", -1))
205
241
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
206
242
  kwargs.get("presence_penalty", 0.0))
243
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
244
+ kwargs.get("stop_sequences", []))
245
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
246
+ kwargs.get("temperature", 0.3))
247
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
248
+ kwargs.get("k", 1.0))
249
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
250
+ kwargs.get("p", 1.0))
251
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
252
+ response_dict.get("id"))
253
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
254
+ request_model)
255
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
256
+ input_tokens)
257
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
258
+ output_tokens)
259
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
260
+ server_address)
261
+ if isinstance(llm_response, str):
262
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
263
+ "text")
264
+ else:
265
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
266
+ "json")
267
+
268
+ # Set base span attribues (Extras)
269
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
270
+ environment)
271
+ span.set_attribute(SERVICE_NAME,
272
+ application_name)
207
273
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
208
274
  False)
209
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
210
- response.generation_id)
211
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
212
- [response.finish_reason])
213
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
214
- response.meta.billed_units.input_tokens)
215
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
216
- response.meta.billed_units.output_tokens)
217
275
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
218
- response.meta.billed_units.input_tokens +
219
- response.meta.billed_units.output_tokens)
276
+ input_tokens + output_tokens)
220
277
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
221
278
  cost)
279
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
280
+ end_time - start_time)
281
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
282
+ version)
222
283
 
223
284
  if trace_content:
224
285
  span.add_event(
225
286
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
226
287
  attributes={
227
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("message", ""),
288
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
228
289
  },
229
290
  )
230
291
  span.add_event(
231
292
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
232
293
  attributes={
233
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
294
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llm_response,
234
295
  },
235
296
  )
236
297
 
237
298
  span.set_status(Status(StatusCode.OK))
238
299
 
239
300
  if disable_metrics is False:
240
- attributes = {
241
- TELEMETRY_SDK_NAME:
242
- "openlit",
243
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
244
- application_name,
245
- SemanticConvetion.GEN_AI_SYSTEM:
246
- SemanticConvetion.GEN_AI_SYSTEM_COHERE,
247
- SemanticConvetion.GEN_AI_ENVIRONMENT:
248
- environment,
249
- SemanticConvetion.GEN_AI_TYPE:
250
- SemanticConvetion.GEN_AI_TYPE_CHAT,
251
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
252
- kwargs.get("model", "command")
253
- }
301
+ attributes = create_metrics_attributes(
302
+ service_name=application_name,
303
+ deployment_environment=environment,
304
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
305
+ system=SemanticConvetion.GEN_AI_SYSTEM_COHERE,
306
+ request_model=request_model,
307
+ server_address=server_address,
308
+ server_port=server_port,
309
+ response_model=request_model,
310
+ )
254
311
 
312
+ metrics["genai_client_usage_tokens"].record(
313
+ input_tokens + output_tokens, attributes
314
+ )
315
+ metrics["genai_client_operation_duration"].record(
316
+ end_time - start_time, attributes
317
+ )
318
+ metrics["genai_server_ttft"].record(
319
+ end_time - start_time, attributes
320
+ )
255
321
  metrics["genai_requests"].add(1, attributes)
256
- metrics["genai_total_tokens"].add(
257
- response.meta.billed_units.input_tokens +
258
- response.meta.billed_units.output_tokens, attributes)
259
- metrics["genai_completion_tokens"].add(
260
- response.meta.billed_units.output_tokens, attributes)
261
- metrics["genai_prompt_tokens"].add(
262
- response.meta.billed_units.input_tokens, attributes)
322
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
323
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
263
324
  metrics["genai_cost"].record(cost, attributes)
264
325
 
265
326
  # Return original response
@@ -274,18 +335,17 @@ def chat(gen_ai_endpoint, version, environment, application_name, tracer,
274
335
 
275
336
  return wrapper
276
337
 
277
- def chat_stream(gen_ai_endpoint, version, environment, application_name,
338
+ def chat_stream(version, environment, application_name,
278
339
  tracer, pricing_info, trace_content, metrics, disable_metrics):
279
340
  """
280
341
  Generates a telemetry wrapper for chat_stream to collect metrics.
281
342
 
282
343
  Args:
283
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
284
344
  version: Version of the monitoring package.
285
345
  environment: Deployment environment (e.g., production, staging).
286
- application_name: Name of the application using the OpenAI API.
346
+ application_name: Name of the application using the Cohere API.
287
347
  tracer: OpenTelemetry tracer for creating spans.
288
- pricing_info: Information used for calculating the cost of OpenAI usage.
348
+ pricing_info: Information used for calculating the cost of Cohere usage.
289
349
  trace_content: Flag indicating whether to trace the actual content.
290
350
 
291
351
  Returns:
@@ -309,111 +369,242 @@ def chat_stream(gen_ai_endpoint, version, environment, application_name,
309
369
  The response from the original 'chat_stream' method.
310
370
  """
311
371
 
312
- def stream_generator():
313
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
372
+ class TracedSyncStream:
373
+ """
374
+ Wrapper for streaming responses to collect metrics and trace data.
375
+ Wraps the 'cohere.AsyncStream' response to collect message IDs and aggregated response.
376
+
377
+ This class implements the '__aiter__' and '__anext__' methods that
378
+ handle asynchronous streaming responses.
379
+
380
+ This class also implements '__aenter__' and '__aexit__' methods that
381
+ handle asynchronous context management protocol.
382
+ """
383
+ def __init__(
384
+ self,
385
+ wrapped,
386
+ span,
387
+ kwargs,
388
+ server_address,
389
+ server_port,
390
+ **args,
391
+ ):
392
+ self.__wrapped__ = wrapped
393
+ self._span = span
314
394
  # Placeholder for aggregating streaming response
315
- llmresponse = ""
316
-
317
- # Loop through streaming events capturing relevant details
318
- for event in wrapped(*args, **kwargs):
319
- # Collect message IDs and aggregated response from events
320
- if event.event_type == "stream-end":
321
- llmresponse = event.response.text
322
- prompt_tokens = event.response.meta.billed_units.input_tokens
323
- completion_tokens = event.response.meta.billed_units.output_tokens
324
- finish_reason = event.finish_reason
325
- if event.event_type == "stream-start":
326
- response_id = event.generation_id
327
- yield event
328
-
329
- # Handling exception ensure observability without disrupting operation
395
+ self._llmresponse = ""
396
+ self._response_id = ""
397
+ self._finish_reason = ""
398
+ self._input_tokens = ""
399
+ self._output_tokens = ""
400
+
401
+ self._args = args
402
+ self._kwargs = kwargs
403
+ self._start_time = time.time()
404
+ self._end_time = None
405
+ self._timestamps = []
406
+ self._ttft = 0
407
+ self._tbt = 0
408
+ self._server_address = server_address
409
+ self._server_port = server_port
410
+
411
+ def __enter__(self):
412
+ self.__wrapped__.__enter__()
413
+ return self
414
+
415
+ def __exit__(self, exc_type, exc_value, traceback):
416
+ self.__wrapped__.__exit__(exc_type, exc_value, traceback)
417
+
418
+ def __iter__(self):
419
+ return self
420
+
421
+ def __getattr__(self, name):
422
+ """Delegate attribute access to the wrapped object."""
423
+ return getattr(self.__wrapped__, name)
424
+
425
+ def __next__(self):
330
426
  try:
331
- # Calculate cost of the operation
332
- cost = get_chat_model_cost(kwargs.get("model", "command"),
333
- pricing_info, prompt_tokens, completion_tokens)
334
-
335
- # Set Span attributes
336
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
337
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
338
- SemanticConvetion.GEN_AI_SYSTEM_COHERE)
339
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
340
- SemanticConvetion.GEN_AI_TYPE_CHAT)
341
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
342
- gen_ai_endpoint)
343
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
344
- environment)
345
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
346
- application_name)
347
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
348
- kwargs.get("model", "command"))
349
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
350
- kwargs.get("temperature", 0.3))
351
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
352
- kwargs.get("max_tokens", -1))
353
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
354
- kwargs.get("seed", ""))
355
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
356
- kwargs.get("frequency_penalty", 0.0))
357
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
358
- kwargs.get("presence_penalty", 0.0))
359
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
360
- True)
361
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
362
- response_id)
363
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
364
- [finish_reason])
365
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
366
- prompt_tokens)
367
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
368
- completion_tokens)
369
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
370
- prompt_tokens + completion_tokens)
371
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
372
- cost)
373
- if trace_content:
374
- span.add_event(
375
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
376
- attributes={
377
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("message", ""),
378
- },
379
- )
380
- span.add_event(
381
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
382
- attributes={
383
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
384
- },
385
- )
386
-
387
- span.set_status(Status(StatusCode.OK))
388
-
389
- if disable_metrics is False:
390
- attributes = {
391
- TELEMETRY_SDK_NAME:
392
- "openlit",
393
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
394
- application_name,
395
- SemanticConvetion.GEN_AI_SYSTEM:
396
- SemanticConvetion.GEN_AI_SYSTEM_COHERE,
397
- SemanticConvetion.GEN_AI_ENVIRONMENT:
398
- environment,
399
- SemanticConvetion.GEN_AI_TYPE:
400
- SemanticConvetion.GEN_AI_TYPE_CHAT,
401
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
402
- kwargs.get("model", "command")
403
- }
404
-
405
- metrics["genai_requests"].add(1, attributes)
406
- metrics["genai_total_tokens"].add(
407
- prompt_tokens + completion_tokens, attributes
408
- )
409
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
410
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
411
- metrics["genai_cost"].record(cost, attributes)
412
-
413
- except Exception as e:
414
- handle_exception(span, e)
415
- logger.error("Error in trace creation: %s", e)
416
-
417
- return stream_generator()
427
+ chunk = self.__wrapped__.__next__()
428
+ end_time = time.time()
429
+ # Record the timestamp for the current chunk
430
+ self._timestamps.append(end_time)
431
+
432
+ if len(self._timestamps) == 1:
433
+ # Calculate time to first chunk
434
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
435
+
436
+ chunked = response_as_dict(chunk)
437
+
438
+ if chunked.get('type') == 'message-start':
439
+ self._response_id = chunked.get('id')
440
+
441
+ if chunked.get('type') == 'content-delta':
442
+ content = chunked.get('delta').get('message').get('text')
443
+ if content:
444
+ self._llmresponse += content
445
+
446
+ if chunked.get('type') == 'message-end':
447
+ self._finish_reason = chunked.get('delta').get('finish_reason')
448
+ self._input_tokens = chunked.get('delta').get('usage').get('billed_units').get('input_tokens')
449
+ self._output_tokens = chunked.get('delta').get('usage').get('billed_units').get('output_tokens')
450
+
451
+ return chunk
452
+ except StopIteration:
453
+ # Handling exception ensure observability without disrupting operation
454
+ try:
455
+ self._end_time = time.time()
456
+ if len(self._timestamps) > 1:
457
+ self._tbt = calculate_tbt(self._timestamps)
458
+
459
+ # Format 'messages' into a single string
460
+ message_prompt = self._kwargs.get("messages", "")
461
+ formatted_messages = []
462
+ for message in message_prompt:
463
+ role = message["role"]
464
+ content = message["content"]
465
+
466
+ if isinstance(content, list):
467
+ content_str_list = []
468
+ for item in content:
469
+ if item["type"] == "text":
470
+ content_str_list.append(f'text: {item["text"]}')
471
+ elif (item["type"] == "image_url" and
472
+ not item["image_url"]["url"].startswith("data:")):
473
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
474
+ content_str = ", ".join(content_str_list)
475
+ formatted_messages.append(f"{role}: {content_str}")
476
+ else:
477
+ formatted_messages.append(f"{role}: {content}")
478
+ prompt = "\n".join(formatted_messages)
479
+
480
+ request_model = self._kwargs.get("model", "command-r-plus")
481
+
482
+ # Calculate cost of the operation
483
+ cost = get_chat_model_cost(request_model,
484
+ pricing_info, self._input_tokens,
485
+ self._output_tokens)
486
+
487
+ # Set Span attributes (OTel Semconv)
488
+ self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
489
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
490
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
491
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
492
+ SemanticConvetion.GEN_AI_SYSTEM_COHERE)
493
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
494
+ request_model)
495
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
496
+ self._kwargs.get("seed", ""))
497
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
498
+ self._server_port)
499
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
500
+ self._kwargs.get("frequency_penalty", 0.0))
501
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
502
+ self._kwargs.get("max_tokens", -1))
503
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
504
+ self._kwargs.get("presence_penalty", 0.0))
505
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
506
+ self._kwargs.get("stop_sequences", []))
507
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
508
+ self._kwargs.get("temperature", 0.3))
509
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_K,
510
+ self._kwargs.get("k", 1.0))
511
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
512
+ self._kwargs.get("p", 1.0))
513
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
514
+ [self._finish_reason])
515
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
516
+ self._response_id)
517
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
518
+ request_model)
519
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
520
+ self._input_tokens)
521
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
522
+ self._output_tokens)
523
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
524
+ self._server_address)
525
+
526
+ if isinstance(self._llmresponse, str):
527
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
528
+ "text")
529
+ else:
530
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
531
+ "json")
532
+
533
+ # Set Span attributes (Extra)
534
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
535
+ environment)
536
+ self._span.set_attribute(SERVICE_NAME,
537
+ application_name)
538
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
539
+ True)
540
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
541
+ self._input_tokens + self._output_tokens)
542
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
543
+ cost)
544
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
545
+ self._tbt)
546
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
547
+ self._ttft)
548
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
549
+ version)
550
+ if trace_content:
551
+ self._span.add_event(
552
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
553
+ attributes={
554
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
555
+ },
556
+ )
557
+ self._span.add_event(
558
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
559
+ attributes={
560
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
561
+ },
562
+ )
563
+ self._span.set_status(Status(StatusCode.OK))
564
+
565
+ if disable_metrics is False:
566
+ attributes = create_metrics_attributes(
567
+ service_name=application_name,
568
+ deployment_environment=environment,
569
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
570
+ system=SemanticConvetion.GEN_AI_SYSTEM_COHERE,
571
+ request_model=request_model,
572
+ server_address=self._server_address,
573
+ server_port=self._server_port,
574
+ response_model=request_model,
575
+ )
576
+
577
+ metrics["genai_client_usage_tokens"].record(
578
+ self._input_tokens + self._output_tokens, attributes
579
+ )
580
+ metrics["genai_client_operation_duration"].record(
581
+ self._end_time - self._start_time, attributes
582
+ )
583
+ metrics["genai_server_tbt"].record(
584
+ self._tbt, attributes
585
+ )
586
+ metrics["genai_server_ttft"].record(
587
+ self._ttft, attributes
588
+ )
589
+ metrics["genai_requests"].add(1, attributes)
590
+ metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
591
+ metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
592
+ metrics["genai_cost"].record(cost, attributes)
593
+
594
+ except Exception as e:
595
+ handle_exception(self._span, e)
596
+ logger.error("Error in trace creation: %s", e)
597
+ finally:
598
+ self._span.end()
599
+ raise
600
+
601
+ server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
602
+ request_model = kwargs.get("model", "command-r-plus")
603
+
604
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
605
+
606
+ awaited_wrapped = wrapped(*args, **kwargs)
607
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
608
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
418
609
 
419
610
  return wrapper