openlit 1.34.17__py3-none-any.whl → 1.34.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,185 +1,145 @@
1
1
  """
2
- Module for monitoring Cohere API calls.
2
+ Module for monitoring Cohere API calls (async version).
3
3
  """
4
4
 
5
- import logging
6
5
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
6
+ from opentelemetry.trace import SpanKind
9
7
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
- get_embed_model_cost,
12
8
  handle_exception,
13
- response_as_dict,
14
- calculate_ttft,
15
- calculate_tbt,
16
- create_metrics_attributes,
17
- set_server_address_and_port
9
+ set_server_address_and_port,
10
+ )
11
+ from openlit.instrumentation.cohere.utils import (
12
+ process_chunk,
13
+ process_chat_response,
14
+ process_streaming_chat_response,
15
+ process_embedding_response,
18
16
  )
19
17
  from openlit.semcov import SemanticConvention
20
18
 
21
- # Initialize logger for logging potential issues and operations
22
- logger = logging.getLogger(__name__)
23
-
24
- def async_embed(version, environment, application_name, tracer,
25
- pricing_info, capture_message_content, metrics, disable_metrics):
19
+ def async_chat(version, environment, application_name,
20
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
26
21
  """
27
- Generates a telemetry wrapper for embeddings to collect metrics.
28
-
29
- Args:
30
- version: Version of the monitoring package.
31
- environment: Deployment environment (e.g., production, staging).
32
- application_name: Name of the application using the Cohere API.
33
- tracer: OpenTelemetry tracer for creating spans.
34
- pricing_info: Information used for calculating the cost of Cohere usage.
35
- capture_message_content: Flag indicating whether to trace the actual content.
36
-
37
- Returns:
38
- A function that wraps the embeddings method to add telemetry.
22
+ Generates a telemetry wrapper for GenAI chat function call
39
23
  """
40
24
 
41
25
  async def wrapper(wrapped, instance, args, kwargs):
42
26
  """
43
- Wraps the 'embed' API call to add telemetry.
44
-
45
- This collects metrics such as execution time, cost, and token usage, and handles errors
46
- gracefully, adding details to the trace for observability.
47
-
48
- Args:
49
- wrapped: The original 'embed' method to be wrapped.
50
- instance: The instance of the class where the original method is defined.
51
- args: Positional arguments for the 'embed' method.
52
- kwargs: Keyword arguments for the 'embed' method.
53
-
54
- Returns:
55
- The response from the original 'embed' method.
27
+ Wraps the GenAI chat function call.
56
28
  """
57
29
 
58
30
  server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
59
- request_model = kwargs.get("model", "mbed-english-v3.0")
31
+ request_model = kwargs.get("model", "command-r-plus-08-2024")
60
32
 
61
- span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
33
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
62
34
 
63
- with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
35
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
64
36
  start_time = time.time()
65
37
  response = await wrapped(*args, **kwargs)
66
- end_time = time.time()
38
+ response = process_chat_response(
39
+ response=response,
40
+ request_model=request_model,
41
+ pricing_info=pricing_info,
42
+ server_port=server_port,
43
+ server_address=server_address,
44
+ environment=environment,
45
+ application_name=application_name,
46
+ metrics=metrics,
47
+ start_time=start_time,
48
+ span=span,
49
+ capture_message_content=capture_message_content,
50
+ disable_metrics=disable_metrics,
51
+ version=version,
52
+ **kwargs
53
+ )
54
+
55
+ return response
67
56
 
68
- response_dict = response_as_dict(response)
69
- try:
70
- input_tokens = response_dict.get('meta').get('billed_units').get('input_tokens')
71
- # Calculate cost of the operation
72
- cost = get_embed_model_cost(kwargs.get("model", "embed-english-v2.0"),
73
- pricing_info, input_tokens)
74
-
75
- # Set Span attributes (OTel Semconv)
76
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
77
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
78
- SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING)
79
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
80
- SemanticConvention.GEN_AI_SYSTEM_COHERE)
81
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
82
- request_model)
83
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_ENCODING_FORMATS,
84
- kwargs.get('embedding_types', ['float']))
85
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
86
- request_model)
87
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
88
- server_address)
89
- span.set_attribute(SemanticConvention.SERVER_PORT,
90
- server_port)
91
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
92
- input_tokens)
93
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
94
- response_dict.get('response_type'))
95
-
96
- # Set Span attributes (Extras)
97
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
98
- environment)
99
- span.set_attribute(SERVICE_NAME,
100
- application_name)
101
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
102
- input_tokens)
103
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
104
- cost)
105
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
106
- version)
107
-
108
- if capture_message_content:
109
- span.add_event(
110
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
111
- attributes={
112
- SemanticConvention.GEN_AI_CONTENT_PROMPT: str(kwargs.get("texts", "")),
113
- },
114
- )
115
-
116
- span.set_status(Status(StatusCode.OK))
117
-
118
- if disable_metrics is False:
119
- attributes = create_metrics_attributes(
120
- service_name=application_name,
121
- deployment_environment=environment,
122
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING,
123
- system=SemanticConvention.GEN_AI_SYSTEM_COHERE,
124
- request_model=request_model,
125
- server_address=server_address,
126
- server_port=server_port,
127
- response_model=request_model,
128
- )
129
- metrics["genai_client_usage_tokens"].record(
130
- input_tokens, attributes
131
- )
132
- metrics["genai_client_operation_duration"].record(
133
- end_time - start_time, attributes
134
- )
135
- metrics["genai_requests"].add(1, attributes)
136
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
137
- metrics["genai_cost"].record(cost, attributes)
57
+ return wrapper
138
58
 
139
- # Return original response
140
- return response
59
+ def async_chat_stream(version, environment, application_name,
60
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
61
+ """
62
+ Generates a telemetry wrapper for GenAI chat_stream function call
63
+ """
141
64
 
142
- except Exception as e:
143
- handle_exception(span, e)
144
- logger.error("Error in trace creation: %s", e)
65
+ class TracedAsyncStream:
66
+ """
67
+ Wrapper for async streaming responses to collect telemetry.
68
+ """
145
69
 
146
- # Return original response
147
- return response
70
+ def __init__(
71
+ self,
72
+ wrapped,
73
+ span,
74
+ span_name,
75
+ kwargs,
76
+ server_address,
77
+ server_port,
78
+ **args,
79
+ ):
80
+ self.__wrapped__ = wrapped
81
+ self._span = span
82
+ self._span_name = span_name
83
+ self._llmresponse = ""
84
+ self._response_id = ""
85
+ self._response_model = ""
86
+ self._finish_reason = ""
87
+ self._tools = None
88
+ self._tool_plan = ""
89
+ self._input_tokens = 0
90
+ self._output_tokens = 0
91
+
92
+ self._args = args
93
+ self._kwargs = kwargs
94
+ self._start_time = time.time()
95
+ self._end_time = None
96
+ self._timestamps = []
97
+ self._ttft = 0
98
+ self._tbt = 0
99
+ self._server_address = server_address
100
+ self._server_port = server_port
101
+
102
+ async def __aenter__(self):
103
+ await self.__wrapped__.__aenter__()
104
+ return self
105
+
106
+ async def __aexit__(self, exc_type, exc_value, traceback):
107
+ await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
108
+
109
+ def __aiter__(self):
110
+ return self
111
+
112
+ async def __getattr__(self, name):
113
+ """Delegate attribute access to the wrapped object."""
114
+ return getattr(await self.__wrapped__, name)
115
+
116
+ async def __anext__(self):
117
+ try:
118
+ chunk = await self.__wrapped__.__anext__()
119
+ process_chunk(self, chunk)
120
+ return chunk
121
+ except StopAsyncIteration:
122
+ try:
123
+ with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
124
+ process_streaming_chat_response(
125
+ self,
126
+ pricing_info=pricing_info,
127
+ environment=environment,
128
+ application_name=application_name,
129
+ metrics=metrics,
130
+ capture_message_content=capture_message_content,
131
+ disable_metrics=disable_metrics,
132
+ version=version
133
+ )
148
134
 
149
- return wrapper
135
+ except Exception as e:
136
+ handle_exception(self._span, e)
150
137
 
151
- def async_chat(version, environment, application_name, tracer,
152
- pricing_info, capture_message_content, metrics, disable_metrics):
153
- """
154
- Generates a telemetry wrapper for chat to collect metrics.
155
-
156
- Args:
157
- version: Version of the monitoring package.
158
- environment: Deployment environment (e.g., production, staging).
159
- application_name: Name of the application using the Cohere API.
160
- tracer: OpenTelemetry tracer for creating spans.
161
- pricing_info: Information used for calculating the cost of Cohere usage.
162
- capture_message_content: Flag indicating whether to trace the actual content.
163
-
164
- Returns:
165
- A function that wraps the chat method to add telemetry.
166
- """
138
+ raise
167
139
 
168
140
  async def wrapper(wrapped, instance, args, kwargs):
169
141
  """
170
- Wraps the 'chat' API call to add telemetry.
171
-
172
- This collects metrics such as execution time, cost, and token usage, and handles errors
173
- gracefully, adding details to the trace for observability.
174
-
175
- Args:
176
- wrapped: The original 'chat' method to be wrapped.
177
- instance: The instance of the class where the original method is defined.
178
- args: Positional arguments for the 'chat' method.
179
- kwargs: Keyword arguments for the 'chat' method.
180
-
181
- Returns:
182
- The response from the original 'chat' method.
142
+ Wraps the GenAI chat_stream function call.
183
143
  """
184
144
 
185
145
  server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
@@ -187,424 +147,55 @@ def async_chat(version, environment, application_name, tracer,
187
147
 
188
148
  span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
189
149
 
190
- with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
191
- start_time = time.time()
192
- response = await wrapped(*args, **kwargs)
193
- end_time = time.time()
194
-
195
- response_dict = response_as_dict(response)
196
-
197
- try:
198
- # Format 'messages' into a single string
199
- message_prompt = kwargs.get("messages", "")
200
- formatted_messages = []
201
- for message in message_prompt:
202
- role = message["role"]
203
- content = message["content"]
204
-
205
- if isinstance(content, list):
206
- content_str = ", ".join(
207
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
208
- if "type" in item else f'text: {item["text"]}'
209
- for item in content
210
- )
211
- formatted_messages.append(f"{role}: {content_str}")
212
- else:
213
- formatted_messages.append(f"{role}: {content}")
214
- prompt = "\n".join(formatted_messages)
215
-
216
- input_tokens = response_dict.get('usage').get('billed_units').get('input_tokens')
217
- output_tokens = response_dict.get('usage').get('billed_units').get('output_tokens')
218
-
219
- # Calculate cost of the operation
220
- cost = get_chat_model_cost(request_model, pricing_info,
221
- input_tokens, output_tokens)
222
-
223
- llm_response = response_dict.get('message').get('content')[0].get('text')
224
-
225
- # Set base span attribues (OTel Semconv)
226
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
227
- span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
228
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
229
- span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
230
- SemanticConvention.GEN_AI_SYSTEM_COHERE)
231
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
232
- request_model)
233
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
234
- kwargs.get("seed", ""))
235
- span.set_attribute(SemanticConvention.SERVER_PORT,
236
- server_port)
237
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
238
- kwargs.get("frequency_penalty", 0.0))
239
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
240
- kwargs.get("max_tokens", -1))
241
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
242
- kwargs.get("presence_penalty", 0.0))
243
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
244
- kwargs.get("stop_sequences", []))
245
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
246
- kwargs.get("temperature", 0.3))
247
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
248
- kwargs.get("k", 1.0))
249
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
250
- kwargs.get("p", 1.0))
251
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
252
- response_dict.get("id"))
253
- span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
254
- request_model)
255
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
256
- input_tokens)
257
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
258
- output_tokens)
259
- span.set_attribute(SemanticConvention.SERVER_ADDRESS,
260
- server_address)
261
- if isinstance(llm_response, str):
262
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
263
- "text")
264
- else:
265
- span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
266
- "json")
267
-
268
- # Set base span attribues (Extras)
269
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
270
- environment)
271
- span.set_attribute(SERVICE_NAME,
272
- application_name)
273
- span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
274
- False)
275
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
276
- input_tokens + output_tokens)
277
- span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
278
- cost)
279
- span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
280
- end_time - start_time)
281
- span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
282
- version)
283
-
284
- if capture_message_content:
285
- span.add_event(
286
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
287
- attributes={
288
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
289
- },
290
- )
291
- span.add_event(
292
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
293
- attributes={
294
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: llm_response,
295
- },
296
- )
297
-
298
- span.set_status(Status(StatusCode.OK))
299
-
300
- if disable_metrics is False:
301
- attributes = create_metrics_attributes(
302
- service_name=application_name,
303
- deployment_environment=environment,
304
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
305
- system=SemanticConvention.GEN_AI_SYSTEM_COHERE,
306
- request_model=request_model,
307
- server_address=server_address,
308
- server_port=server_port,
309
- response_model=request_model,
310
- )
311
-
312
- metrics["genai_client_usage_tokens"].record(
313
- input_tokens + output_tokens, attributes
314
- )
315
- metrics["genai_client_operation_duration"].record(
316
- end_time - start_time, attributes
317
- )
318
- metrics["genai_server_ttft"].record(
319
- end_time - start_time, attributes
320
- )
321
- metrics["genai_requests"].add(1, attributes)
322
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
323
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
324
- metrics["genai_cost"].record(cost, attributes)
325
-
326
- # Return original response
327
- return response
328
-
329
- except Exception as e:
330
- handle_exception(span, e)
331
- logger.error("Error in trace creation: %s", e)
150
+ # Stream endpoint is always streaming
151
+ awaited_wrapped = await wrapped(*args, **kwargs)
152
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
332
153
 
333
- # Return original response
334
- return response
154
+ return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
335
155
 
336
156
  return wrapper
337
157
 
338
- def async_chat_stream(version, environment, application_name,
339
- tracer, pricing_info, capture_message_content, metrics, disable_metrics):
158
+ def async_embed(version, environment, application_name,
159
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
340
160
  """
341
- Generates a telemetry wrapper for chat_stream to collect metrics.
342
-
343
- Args:
344
- version: Version of the monitoring package.
345
- environment: Deployment environment (e.g., production, staging).
346
- application_name: Name of the application using the Cohere API.
347
- tracer: OpenTelemetry tracer for creating spans.
348
- pricing_info: Information used for calculating the cost of Cohere usage.
349
- capture_message_content: Flag indicating whether to trace the actual content.
350
-
351
- Returns:
352
- A function that wraps the chat method to add telemetry.
161
+ Generates a telemetry wrapper for GenAI embedding function call
353
162
  """
354
163
 
355
164
  async def wrapper(wrapped, instance, args, kwargs):
356
165
  """
357
- Wraps the 'chat_stream' API call to add telemetry.
358
-
359
- This collects metrics such as execution time, cost, and token usage, and handles errors
360
- gracefully, adding details to the trace for observability.
361
-
362
- Args:
363
- wrapped: The original 'chat_stream' method to be wrapped.
364
- instance: The instance of the class where the original method is defined.
365
- args: Positional arguments for the 'chat_stream' method.
366
- kwargs: Keyword arguments for the 'chat_stream' method.
367
-
368
- Returns:
369
- The response from the original 'chat_stream' method.
166
+ Wraps the GenAI embedding function call.
370
167
  """
371
168
 
372
- class TracedAsyncStream:
373
- """
374
- Wrapper for streaming responses to collect metrics and trace data.
375
- Wraps the response to collect message IDs and aggregated response.
376
-
377
- This class implements the '__aiter__' and '__anext__' methods that
378
- handle asynchronous streaming responses.
379
-
380
- This class also implements '__aenter__' and '__aexit__' methods that
381
- handle asynchronous context management protocol.
382
- """
383
- def __init__(
384
- self,
385
- wrapped,
386
- span,
387
- kwargs,
388
- server_address,
389
- server_port,
390
- **args,
391
- ):
392
- self.__wrapped__ = wrapped
393
- self._span = span
394
- # Placeholder for aggregating streaming response
395
- self._llmresponse = ""
396
- self._response_id = ""
397
- self._finish_reason = ""
398
- self._input_tokens = ""
399
- self._output_tokens = ""
400
-
401
- self._args = args
402
- self._kwargs = kwargs
403
- self._start_time = time.time()
404
- self._end_time = None
405
- self._timestamps = []
406
- self._ttft = 0
407
- self._tbt = 0
408
- self._server_address = server_address
409
- self._server_port = server_port
410
-
411
- async def __aenter__(self):
412
- await self.__wrapped__.__aenter__()
413
- return self
414
-
415
- async def __aexit__(self, exc_type, exc_value, traceback):
416
- await self.__wrapped__.__aexit__(exc_type, exc_value, traceback)
417
-
418
- def __aiter__(self):
419
- return self
420
-
421
- async def __getattr__(self, name):
422
- """Delegate attribute access to the wrapped object."""
423
- return getattr(await self.__wrapped__, name)
424
-
425
- async def __anext__(self):
426
- try:
427
- chunk = await self.__wrapped__.__anext__()
428
- end_time = time.time()
429
- # Record the timestamp for the current chunk
430
- self._timestamps.append(end_time)
431
-
432
- if len(self._timestamps) == 1:
433
- # Calculate time to first chunk
434
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
435
-
436
- chunked = response_as_dict(chunk)
437
-
438
- if chunked.get('type') == 'message-start':
439
- self._response_id = chunked.get('id')
440
-
441
- if chunked.get('type') == 'content-delta':
442
- content = chunked.get('delta').get('message').get('text')
443
- if content:
444
- self._llmresponse += content
445
-
446
- if chunked.get('type') == 'message-end':
447
- self._finish_reason = chunked.get('delta').get('finish_reason')
448
- self._input_tokens = chunked.get('delta').get('usage').get('billed_units').get('input_tokens')
449
- self._output_tokens = chunked.get('delta').get('usage').get('billed_units').get('output_tokens')
450
-
451
- return chunk
452
- except StopAsyncIteration:
453
- # Handling exception ensure observability without disrupting operation
454
- try:
455
- self._end_time = time.time()
456
- if len(self._timestamps) > 1:
457
- self._tbt = calculate_tbt(self._timestamps)
458
-
459
- # Format 'messages' into a single string
460
- message_prompt = self._kwargs.get("messages", "")
461
- formatted_messages = []
462
- for message in message_prompt:
463
- role = message["role"]
464
- content = message["content"]
465
-
466
- if isinstance(content, list):
467
- content_str_list = []
468
- for item in content:
469
- if item["type"] == "text":
470
- content_str_list.append(f'text: {item["text"]}')
471
- elif (item["type"] == "image_url" and
472
- not item["image_url"]["url"].startswith("data:")):
473
- content_str_list.append(f'image_url: {item["image_url"]["url"]}')
474
- content_str = ", ".join(content_str_list)
475
- formatted_messages.append(f"{role}: {content_str}")
476
- else:
477
- formatted_messages.append(f"{role}: {content}")
478
- prompt = "\n".join(formatted_messages)
479
-
480
- request_model = self._kwargs.get("model", "command-r-plus")
481
-
482
- # Calculate cost of the operation
483
- cost = get_chat_model_cost(request_model,
484
- pricing_info, self._input_tokens,
485
- self._output_tokens)
486
-
487
- # Set Span attributes (OTel Semconv)
488
- self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
489
- self._span.set_attribute(SemanticConvention.GEN_AI_OPERATION,
490
- SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT)
491
- self._span.set_attribute(SemanticConvention.GEN_AI_SYSTEM,
492
- SemanticConvention.GEN_AI_SYSTEM_COHERE)
493
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MODEL,
494
- request_model)
495
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_SEED,
496
- self._kwargs.get("seed", ""))
497
- self._span.set_attribute(SemanticConvention.SERVER_PORT,
498
- self._server_port)
499
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
500
- self._kwargs.get("frequency_penalty", 0.0))
501
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
502
- self._kwargs.get("max_tokens", -1))
503
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
504
- self._kwargs.get("presence_penalty", 0.0))
505
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
506
- self._kwargs.get("stop_sequences", []))
507
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
508
- self._kwargs.get("temperature", 0.3))
509
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K,
510
- self._kwargs.get("k", 1.0))
511
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P,
512
- self._kwargs.get("p", 1.0))
513
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_FINISH_REASON,
514
- [self._finish_reason])
515
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_ID,
516
- self._response_id)
517
- self._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL,
518
- request_model)
519
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
520
- self._input_tokens)
521
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
522
- self._output_tokens)
523
- self._span.set_attribute(SemanticConvention.SERVER_ADDRESS,
524
- self._server_address)
525
-
526
- if isinstance(self._llmresponse, str):
527
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
528
- "text")
529
- else:
530
- self._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE,
531
- "json")
532
-
533
- # Set Span attributes (Extra)
534
- self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
535
- environment)
536
- self._span.set_attribute(SERVICE_NAME,
537
- application_name)
538
- self._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM,
539
- True)
540
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_TOTAL_TOKENS,
541
- self._input_tokens + self._output_tokens)
542
- self._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST,
543
- cost)
544
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT,
545
- self._tbt)
546
- self._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT,
547
- self._ttft)
548
- self._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION,
549
- version)
550
- if capture_message_content:
551
- self._span.add_event(
552
- name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
553
- attributes={
554
- SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
555
- },
556
- )
557
- self._span.add_event(
558
- name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
559
- attributes={
560
- SemanticConvention.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
561
- },
562
- )
563
- self._span.set_status(Status(StatusCode.OK))
564
-
565
- if disable_metrics is False:
566
- attributes = create_metrics_attributes(
567
- service_name=application_name,
568
- deployment_environment=environment,
569
- operation=SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
570
- system=SemanticConvention.GEN_AI_SYSTEM_COHERE,
571
- request_model=request_model,
572
- server_address=self._server_address,
573
- server_port=self._server_port,
574
- response_model=request_model,
575
- )
576
-
577
- metrics["genai_client_usage_tokens"].record(
578
- self._input_tokens + self._output_tokens, attributes
579
- )
580
- metrics["genai_client_operation_duration"].record(
581
- self._end_time - self._start_time, attributes
582
- )
583
- metrics["genai_server_tbt"].record(
584
- self._tbt, attributes
585
- )
586
- metrics["genai_server_ttft"].record(
587
- self._ttft, attributes
588
- )
589
- metrics["genai_requests"].add(1, attributes)
590
- metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
591
- metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
592
- metrics["genai_cost"].record(cost, attributes)
593
-
594
- except Exception as e:
595
- handle_exception(self._span, e)
596
- logger.error("Error in trace creation: %s", e)
597
- finally:
598
- self._span.end()
599
- raise
600
-
601
169
  server_address, server_port = set_server_address_and_port(instance, "api.cohere.com", 443)
602
- request_model = kwargs.get("model", "command-r-plus")
170
+ request_model = kwargs.get("model", "embed-english-v3.0")
603
171
 
604
- span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
172
+ span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
605
173
 
606
- awaited_wrapped = await wrapped(*args, **kwargs)
607
- span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
608
- return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
174
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
175
+ start_time = time.time()
176
+ response = await wrapped(*args, **kwargs)
177
+
178
+ try:
179
+ response = process_embedding_response(
180
+ response=response,
181
+ request_model=request_model,
182
+ pricing_info=pricing_info,
183
+ server_port=server_port,
184
+ server_address=server_address,
185
+ environment=environment,
186
+ application_name=application_name,
187
+ metrics=metrics,
188
+ start_time=start_time,
189
+ span=span,
190
+ capture_message_content=capture_message_content,
191
+ disable_metrics=disable_metrics,
192
+ version=version,
193
+ **kwargs
194
+ )
195
+
196
+ except Exception as e:
197
+ handle_exception(span, e)
198
+
199
+ return response
609
200
 
610
201
  return wrapper