openlit 1.33.10__py3-none-any.whl → 1.33.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openlit/__helpers.py +125 -88
  2. openlit/__init__.py +38 -11
  3. openlit/instrumentation/ag2/__init__.py +19 -20
  4. openlit/instrumentation/ag2/ag2.py +134 -69
  5. openlit/instrumentation/ai21/__init__.py +22 -21
  6. openlit/instrumentation/ai21/ai21.py +82 -546
  7. openlit/instrumentation/ai21/async_ai21.py +82 -546
  8. openlit/instrumentation/ai21/utils.py +409 -0
  9. openlit/instrumentation/anthropic/__init__.py +16 -16
  10. openlit/instrumentation/anthropic/anthropic.py +61 -353
  11. openlit/instrumentation/anthropic/async_anthropic.py +62 -354
  12. openlit/instrumentation/anthropic/utils.py +251 -0
  13. openlit/instrumentation/assemblyai/__init__.py +2 -2
  14. openlit/instrumentation/assemblyai/assemblyai.py +3 -3
  15. openlit/instrumentation/astra/__init__.py +25 -25
  16. openlit/instrumentation/astra/astra.py +2 -2
  17. openlit/instrumentation/astra/async_astra.py +2 -2
  18. openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
  19. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +8 -8
  20. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +8 -8
  21. openlit/instrumentation/bedrock/__init__.py +2 -2
  22. openlit/instrumentation/bedrock/bedrock.py +3 -3
  23. openlit/instrumentation/chroma/__init__.py +9 -9
  24. openlit/instrumentation/chroma/chroma.py +2 -2
  25. openlit/instrumentation/cohere/__init__.py +7 -7
  26. openlit/instrumentation/cohere/async_cohere.py +9 -9
  27. openlit/instrumentation/cohere/cohere.py +9 -9
  28. openlit/instrumentation/controlflow/__init__.py +4 -4
  29. openlit/instrumentation/controlflow/controlflow.py +2 -2
  30. openlit/instrumentation/crawl4ai/__init__.py +3 -3
  31. openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
  32. openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
  33. openlit/instrumentation/crewai/__init__.py +3 -3
  34. openlit/instrumentation/crewai/crewai.py +2 -2
  35. openlit/instrumentation/dynamiq/__init__.py +5 -5
  36. openlit/instrumentation/dynamiq/dynamiq.py +2 -2
  37. openlit/instrumentation/elevenlabs/__init__.py +5 -5
  38. openlit/instrumentation/elevenlabs/async_elevenlabs.py +3 -3
  39. openlit/instrumentation/elevenlabs/elevenlabs.py +3 -3
  40. openlit/instrumentation/embedchain/__init__.py +2 -2
  41. openlit/instrumentation/embedchain/embedchain.py +4 -4
  42. openlit/instrumentation/firecrawl/__init__.py +3 -3
  43. openlit/instrumentation/firecrawl/firecrawl.py +2 -2
  44. openlit/instrumentation/google_ai_studio/__init__.py +3 -3
  45. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
  46. openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
  47. openlit/instrumentation/gpt4all/__init__.py +3 -3
  48. openlit/instrumentation/gpt4all/gpt4all.py +7 -7
  49. openlit/instrumentation/groq/__init__.py +3 -3
  50. openlit/instrumentation/groq/async_groq.py +5 -5
  51. openlit/instrumentation/groq/groq.py +5 -5
  52. openlit/instrumentation/haystack/__init__.py +2 -2
  53. openlit/instrumentation/haystack/haystack.py +2 -2
  54. openlit/instrumentation/julep/__init__.py +7 -7
  55. openlit/instrumentation/julep/async_julep.py +3 -3
  56. openlit/instrumentation/julep/julep.py +3 -3
  57. openlit/instrumentation/langchain/__init__.py +2 -2
  58. openlit/instrumentation/langchain/async_langchain.py +13 -9
  59. openlit/instrumentation/langchain/langchain.py +13 -8
  60. openlit/instrumentation/letta/__init__.py +7 -7
  61. openlit/instrumentation/letta/letta.py +5 -5
  62. openlit/instrumentation/litellm/__init__.py +5 -5
  63. openlit/instrumentation/litellm/async_litellm.py +8 -8
  64. openlit/instrumentation/litellm/litellm.py +8 -8
  65. openlit/instrumentation/llamaindex/__init__.py +2 -2
  66. openlit/instrumentation/llamaindex/llamaindex.py +2 -2
  67. openlit/instrumentation/mem0/__init__.py +2 -2
  68. openlit/instrumentation/mem0/mem0.py +2 -2
  69. openlit/instrumentation/milvus/__init__.py +2 -2
  70. openlit/instrumentation/milvus/milvus.py +2 -2
  71. openlit/instrumentation/mistral/__init__.py +7 -7
  72. openlit/instrumentation/mistral/async_mistral.py +10 -10
  73. openlit/instrumentation/mistral/mistral.py +10 -10
  74. openlit/instrumentation/multion/__init__.py +7 -7
  75. openlit/instrumentation/multion/async_multion.py +5 -5
  76. openlit/instrumentation/multion/multion.py +5 -5
  77. openlit/instrumentation/ollama/__init__.py +11 -9
  78. openlit/instrumentation/ollama/async_ollama.py +71 -465
  79. openlit/instrumentation/ollama/ollama.py +71 -465
  80. openlit/instrumentation/ollama/utils.py +332 -0
  81. openlit/instrumentation/openai/__init__.py +11 -11
  82. openlit/instrumentation/openai/async_openai.py +18 -18
  83. openlit/instrumentation/openai/openai.py +18 -18
  84. openlit/instrumentation/phidata/__init__.py +2 -2
  85. openlit/instrumentation/phidata/phidata.py +2 -2
  86. openlit/instrumentation/pinecone/__init__.py +6 -6
  87. openlit/instrumentation/pinecone/pinecone.py +2 -2
  88. openlit/instrumentation/premai/__init__.py +3 -3
  89. openlit/instrumentation/premai/premai.py +7 -7
  90. openlit/instrumentation/qdrant/__init__.py +2 -2
  91. openlit/instrumentation/qdrant/async_qdrant.py +2 -2
  92. openlit/instrumentation/qdrant/qdrant.py +2 -2
  93. openlit/instrumentation/reka/__init__.py +3 -3
  94. openlit/instrumentation/reka/async_reka.py +3 -3
  95. openlit/instrumentation/reka/reka.py +3 -3
  96. openlit/instrumentation/together/__init__.py +5 -5
  97. openlit/instrumentation/together/async_together.py +8 -8
  98. openlit/instrumentation/together/together.py +8 -8
  99. openlit/instrumentation/transformers/__init__.py +2 -2
  100. openlit/instrumentation/transformers/transformers.py +4 -4
  101. openlit/instrumentation/vertexai/__init__.py +9 -9
  102. openlit/instrumentation/vertexai/async_vertexai.py +4 -4
  103. openlit/instrumentation/vertexai/vertexai.py +4 -4
  104. openlit/instrumentation/vllm/__init__.py +2 -2
  105. openlit/instrumentation/vllm/vllm.py +3 -3
  106. openlit/otel/events.py +85 -0
  107. openlit/otel/tracing.py +3 -13
  108. openlit/semcov/__init__.py +13 -1
  109. {openlit-1.33.10.dist-info → openlit-1.33.12.dist-info}/METADATA +2 -2
  110. openlit-1.33.12.dist-info/RECORD +126 -0
  111. openlit-1.33.10.dist-info/RECORD +0 -122
  112. {openlit-1.33.10.dist-info → openlit-1.33.12.dist-info}/LICENSE +0 -0
  113. {openlit-1.33.10.dist-info → openlit-1.33.12.dist-info}/WHEEL +0 -0
@@ -4,55 +4,39 @@ Module for monitoring AI21 calls.
4
4
 
5
5
  import logging
6
6
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import SpanKind
9
8
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
9
  handle_exception,
12
- response_as_dict,
13
- calculate_ttft,
14
- calculate_tbt,
15
- create_metrics_attributes,
16
10
  set_server_address_and_port,
17
- general_tokens
18
11
  )
12
+ from openlit.instrumentation.ai21.utils import (
13
+ process_chunk,
14
+ process_chat_response,
15
+ process_streaming_chat_response,
16
+ process_chat_rag_response
17
+ )
18
+
19
19
  from openlit.semcov import SemanticConvetion
20
20
 
21
21
  # Initialize logger for logging potential issues and operations
22
22
  logger = logging.getLogger(__name__)
23
23
 
24
24
  def async_chat(version, environment, application_name,
25
- tracer, pricing_info, trace_content, metrics, disable_metrics):
25
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
26
26
  """
27
- Generates a telemetry wrapper for chat completions to collect metrics.
28
-
29
- Args:
30
- version: Version of the monitoring package.
31
- environment: Deployment environment (e.g., production, staging).
32
- application_name: Name of the application using the AI21 SDK.
33
- tracer: OpenTelemetry tracer for creating spans.
34
- pricing_info: Information used for calculating the cost of AI21 usage.
35
- trace_content: Flag indicating whether to trace the actual content.
36
-
37
- Returns:
38
- A function that wraps the chat completions method to add telemetry.
27
+ Generates a telemetry wrapper for GenAI function call
39
28
  """
40
29
 
41
30
  class TracedAsyncStream:
42
31
  """
43
- Wrapper for streaming responses to collect metrics and trace data.
44
- Wraps the response to collect message IDs and aggregated response.
45
-
46
- This class implements the '__aiter__' and '__anext__' methods that
47
- handle asynchronous streaming responses.
48
-
49
- This class also implements '__aenter__' and '__aexit__' methods that
50
- handle asynchronous context management protocol.
32
+ Wrapper for streaming responses to collect telemetry.
51
33
  """
34
+
52
35
  def __init__(
53
36
  self,
54
37
  wrapped,
55
38
  span,
39
+ span_name,
56
40
  kwargs,
57
41
  server_address,
58
42
  server_port,
@@ -60,12 +44,14 @@ def async_chat(version, environment, application_name,
60
44
  ):
61
45
  self.__wrapped__ = wrapped
62
46
  self._span = span
47
+ self._span_name = span_name
63
48
  # Placeholder for aggregating streaming response
64
- self._llmresponse = ""
65
- self._response_id = ""
66
- self._finish_reason = ""
49
+ self._llmresponse = ''
50
+ self._response_id = ''
51
+ self._finish_reason = ''
67
52
  self._input_tokens = 0
68
53
  self._output_tokens = 0
54
+ self._choices = []
69
55
 
70
56
  self._args = args
71
57
  self._kwargs = kwargs
@@ -94,562 +80,112 @@ def async_chat(version, environment, application_name,
94
80
  async def __anext__(self):
95
81
  try:
96
82
  chunk = await self.__wrapped__.__anext__()
97
- end_time = time.time()
98
- # Record the timestamp for the current chunk
99
- self._timestamps.append(end_time)
100
-
101
- if len(self._timestamps) == 1:
102
- # Calculate time to first chunk
103
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
104
-
105
- chunked = response_as_dict(chunk)
106
- if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
107
- 'content' in chunked.get('choices')[0].get('delta'))):
108
-
109
- content = chunked.get('choices')[0].get('delta').get('content')
110
- if content:
111
- self._llmresponse += content
112
-
113
- if chunked.get('usage'):
114
- self._input_tokens = chunked.get('usage').get("prompt_tokens")
115
- self._output_tokens = chunked.get('usage').get("completion_tokens")
116
-
117
- self._response_id = chunked.get('id')
118
- self._finish_reason = chunked.get('choices')[0].get('finish_reason')
83
+ process_chunk(self, chunk)
119
84
  return chunk
120
85
  except StopAsyncIteration:
121
86
  # Handling exception ensure observability without disrupting operation
122
87
  try:
123
- self._end_time = time.time()
124
- if len(self._timestamps) > 1:
125
- self._tbt = calculate_tbt(self._timestamps)
126
-
127
- # Format 'messages' into a single string
128
- message_prompt = self._kwargs.get("messages", "")
129
- formatted_messages = []
130
- for message in message_prompt:
131
- role = message.role
132
- content = message.content
133
-
134
- if isinstance(content, list):
135
- content_str = ", ".join(
136
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
137
- if "type" in item else f'text: {item["text"]}'
138
- for item in content
139
- )
140
- formatted_messages.append(f"{role}: {content_str}")
141
- else:
142
- formatted_messages.append(f"{role}: {content}")
143
- prompt = "\n".join(formatted_messages)
144
-
145
- request_model = self._kwargs.get("model", "jamba-1.5-mini")
146
-
147
- # Calculate cost of the operation
148
- cost = get_chat_model_cost(request_model,
149
- pricing_info, self._input_tokens,
150
- self._output_tokens)
151
-
152
- # Set Span attributes (OTel Semconv)
153
- self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
154
- self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
155
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
156
- self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
157
- SemanticConvetion.GEN_AI_SYSTEM_AI21)
158
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
159
- request_model)
160
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
161
- self._kwargs.get("seed", ""))
162
- self._span.set_attribute(SemanticConvetion.SERVER_PORT,
163
- self._server_port)
164
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
165
- self._kwargs.get("frequency_penalty", 0.0))
166
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
167
- self._kwargs.get("max_tokens", -1))
168
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
169
- self._kwargs.get("presence_penalty", 0.0))
170
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
171
- self._kwargs.get("stop", []))
172
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
173
- self._kwargs.get("temperature", 0.4))
174
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
175
- self._kwargs.get("top_p", 1.0))
176
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
177
- [self._finish_reason])
178
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
179
- self._response_id)
180
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
181
- request_model)
182
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
183
- self._input_tokens)
184
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
185
- self._output_tokens)
186
- self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
187
- self._server_address)
188
-
189
- if isinstance(self._llmresponse, str):
190
- self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
191
- "text")
192
- else:
193
- self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
194
- "json")
195
-
196
- # Set Span attributes (Extra)
197
- self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
198
- environment)
199
- self._span.set_attribute(SERVICE_NAME,
200
- application_name)
201
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
202
- True)
203
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
204
- self._input_tokens + self._output_tokens)
205
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
206
- cost)
207
- self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
208
- self._tbt)
209
- self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
210
- self._ttft)
211
- self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
212
- version)
213
- if trace_content:
214
- self._span.add_event(
215
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
216
- attributes={
217
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
218
- },
219
- )
220
- self._span.add_event(
221
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
222
- attributes={
223
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
224
- },
88
+ with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
89
+ process_streaming_chat_response(
90
+ self,
91
+ pricing_info=pricing_info,
92
+ environment=environment,
93
+ application_name=application_name,
94
+ metrics=metrics,
95
+ event_provider=event_provider,
96
+ capture_message_content=capture_message_content,
97
+ disable_metrics=disable_metrics,
98
+ version=version
225
99
  )
226
- self._span.set_status(Status(StatusCode.OK))
227
-
228
- if disable_metrics is False:
229
- attributes = create_metrics_attributes(
230
- service_name=application_name,
231
- deployment_environment=environment,
232
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
233
- system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
234
- request_model=request_model,
235
- server_address=self._server_address,
236
- server_port=self._server_port,
237
- response_model=request_model,
238
- )
239
-
240
- metrics["genai_client_usage_tokens"].record(
241
- self._input_tokens + self._output_tokens, attributes
242
- )
243
- metrics["genai_client_operation_duration"].record(
244
- self._end_time - self._start_time, attributes
245
- )
246
- metrics["genai_server_tbt"].record(
247
- self._tbt, attributes
248
- )
249
- metrics["genai_server_ttft"].record(
250
- self._ttft, attributes
251
- )
252
- metrics["genai_requests"].add(1, attributes)
253
- metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
254
- metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
255
- metrics["genai_cost"].record(cost, attributes)
256
-
257
100
  except Exception as e:
258
101
  handle_exception(self._span, e)
259
- logger.error("Error in trace creation: %s", e)
260
- finally:
261
- self._span.end()
102
+ logger.error('Error in trace creation: %s', e)
262
103
  raise
263
104
 
264
105
  async def wrapper(wrapped, instance, args, kwargs):
265
106
  """
266
- Wraps the 'chat.completions' API call to add telemetry.
267
-
268
- This collects metrics such as execution time, cost, and token usage, and handles errors
269
- gracefully, adding details to the trace for observability.
270
-
271
- Args:
272
- wrapped: The original 'chat.completions' method to be wrapped.
273
- instance: The instance of the class where the original method is defined.
274
- args: Positional arguments for the 'chat.completions' method.
275
- kwargs: Keyword arguments for the 'chat.completions' method.
276
-
277
- Returns:
278
- The response from the original 'chat.completions' method.
107
+ Wraps the GenAI function call.
279
108
  """
280
109
 
281
110
  # Check if streaming is enabled for the API call
282
- streaming = kwargs.get("stream", False)
283
- server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
284
- request_model = kwargs.get("model", "jamba-1.5-mini")
111
+ streaming = kwargs.get('stream', False)
112
+
113
+ server_address, server_port = set_server_address_and_port(instance, 'api.ai21.com', 443)
114
+ request_model = kwargs.get('model', 'jamba-1.5-mini')
285
115
 
286
- span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
116
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
287
117
 
288
118
  # pylint: disable=no-else-return
289
119
  if streaming:
290
120
  # Special handling for streaming response to accommodate the nature of data flow
291
121
  awaited_wrapped = await wrapped(*args, **kwargs)
292
122
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
293
-
294
- return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
123
+ return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
295
124
 
296
125
  # Handling for non-streaming responses
297
126
  else:
298
127
  with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
299
128
  start_time = time.time()
300
129
  response = await wrapped(*args, **kwargs)
301
- end_time = time.time()
302
-
303
- response_dict = response_as_dict(response)
304
-
305
- try:
306
- # Format 'messages' into a single string
307
- message_prompt = kwargs.get("messages", "")
308
- formatted_messages = []
309
- for message in message_prompt:
310
- role = message.role
311
- content = message.content
312
-
313
- if isinstance(content, list):
314
- content_str = ", ".join(
315
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
316
- if "type" in item else f'text: {item["text"]}'
317
- for item in content
318
- )
319
- formatted_messages.append(f"{role}: {content_str}")
320
- else:
321
- formatted_messages.append(f"{role}: {content}")
322
- prompt = "\n".join(formatted_messages)
323
-
324
- input_tokens = response_dict.get('usage').get('prompt_tokens')
325
- output_tokens = response_dict.get('usage').get('completion_tokens')
326
-
327
- # Calculate cost of the operation
328
- cost = get_chat_model_cost(request_model,
329
- pricing_info, input_tokens,
330
- output_tokens)
331
-
332
- # Set base span attribues (OTel Semconv)
333
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
334
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
335
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
336
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
337
- SemanticConvetion.GEN_AI_SYSTEM_AI21)
338
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
339
- request_model)
340
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
341
- kwargs.get("seed", ""))
342
- span.set_attribute(SemanticConvetion.SERVER_PORT,
343
- server_port)
344
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
345
- kwargs.get("frequency_penalty", 0.0))
346
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
347
- kwargs.get("max_tokens", -1))
348
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
349
- kwargs.get("presence_penalty", 0.0))
350
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
351
- kwargs.get("stop", []))
352
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
353
- kwargs.get("temperature", 0.4))
354
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
355
- kwargs.get("top_p", 1.0))
356
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
357
- response_dict.get("id"))
358
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
359
- request_model)
360
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
361
- input_tokens)
362
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
363
- output_tokens)
364
- span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
365
- server_address)
366
-
367
- # Set base span attribues (Extras)
368
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
369
- environment)
370
- span.set_attribute(SERVICE_NAME,
371
- application_name)
372
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
373
- False)
374
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
375
- input_tokens + output_tokens)
376
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
377
- cost)
378
- span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
379
- end_time - start_time)
380
- span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
381
- version)
382
- if trace_content:
383
- span.add_event(
384
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
385
- attributes={
386
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
387
- },
388
- )
389
-
390
- for i in range(kwargs.get('n',1)):
391
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
392
- [response_dict.get('choices')[i].get('finish_reason')])
393
- if trace_content:
394
- span.add_event(
395
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
396
- attributes={
397
- # pylint: disable=line-too-long
398
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
399
- },
400
- )
401
- if kwargs.get('tools'):
402
- span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
403
- str(response_dict.get('choices')[i].get('message').get('tool_calls')))
404
-
405
- if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
406
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
407
- "text")
408
- elif response_dict.get('choices')[i].get('message').get('content') is not None:
409
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
410
- "json")
411
-
412
- span.set_status(Status(StatusCode.OK))
413
-
414
- if disable_metrics is False:
415
- attributes = create_metrics_attributes(
416
- service_name=application_name,
417
- deployment_environment=environment,
418
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
419
- system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
420
- request_model=request_model,
421
- server_address=server_address,
422
- server_port=server_port,
423
- response_model=request_model,
424
- )
425
-
426
- metrics["genai_client_usage_tokens"].record(
427
- input_tokens + output_tokens, attributes
428
- )
429
- metrics["genai_client_operation_duration"].record(
430
- end_time - start_time, attributes
431
- )
432
- metrics["genai_server_ttft"].record(
433
- end_time - start_time, attributes
434
- )
435
- metrics["genai_requests"].add(1, attributes)
436
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
437
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
438
- metrics["genai_cost"].record(cost, attributes)
439
-
440
- # Return original response
441
- return response
442
-
443
- except Exception as e:
444
- handle_exception(span, e)
445
- logger.error("Error in trace creation: %s", e)
446
-
447
- # Return original response
448
- return response
130
+ response = process_chat_response(
131
+ response=response,
132
+ request_model=request_model,
133
+ pricing_info=pricing_info,
134
+ server_port=server_port,
135
+ server_address=server_address,
136
+ environment=environment,
137
+ application_name=application_name,
138
+ metrics=metrics,
139
+ event_provider=event_provider,
140
+ start_time=start_time,
141
+ span=span,
142
+ capture_message_content=capture_message_content,
143
+ disable_metrics=disable_metrics,
144
+ version=version,
145
+ **kwargs
146
+ )
147
+
148
+ return response
449
149
 
450
150
  return wrapper
451
151
 
452
152
  def async_chat_rag(version, environment, application_name,
453
- tracer, pricing_info, trace_content, metrics, disable_metrics):
153
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
454
154
  """
455
- Generates a telemetry wrapper for chat completions to collect metrics.
456
-
457
- Args:
458
- version: Version of the monitoring package.
459
- environment: Deployment environment (e.g., production, staging).
460
- application_name: Name of the application using the AI21 SDK.
461
- tracer: OpenTelemetry tracer for creating spans.
462
- pricing_info: Information used for calculating the cost of AI21 usage.
463
- trace_content: Flag indicating whether to trace the actual content.
464
-
465
- Returns:
466
- A function that wraps the chat completions method to add telemetry.
155
+ Generates a telemetry wrapper for GenAI function call
467
156
  """
468
157
 
469
158
  async def wrapper(wrapped, instance, args, kwargs):
470
159
  """
471
- Wraps the 'chat.completions' API call to add telemetry.
472
-
473
- This collects metrics such as execution time, cost, and token usage, and handles errors
474
- gracefully, adding details to the trace for observability.
475
-
476
- Args:
477
- wrapped: The original 'chat.completions' method to be wrapped.
478
- instance: The instance of the class where the original method is defined.
479
- args: Positional arguments for the 'chat.completions' method.
480
- kwargs: Keyword arguments for the 'chat.completions' method.
481
-
482
- Returns:
483
- The response from the original 'chat.completions' method.
160
+ Wraps the GenAI function call.
484
161
  """
485
162
 
486
- server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
487
- request_model = kwargs.get("model", "jamba-1.5-mini")
163
+ server_address, server_port = set_server_address_and_port(instance, 'api.ai21.com', 443)
164
+ request_model = kwargs.get('model', 'jamba-1.5-mini')
488
165
 
489
- span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
166
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
490
167
 
491
168
  with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
492
169
  start_time = time.time()
493
170
  response = await wrapped(*args, **kwargs)
494
- end_time = time.time()
495
-
496
- response_dict = response_as_dict(response)
497
-
498
- try:
499
- # Format 'messages' into a single string
500
- message_prompt = kwargs.get("messages", "")
501
- formatted_messages = []
502
- for message in message_prompt:
503
- role = message.role
504
- content = message.content
505
-
506
- if isinstance(content, list):
507
- content_str = ", ".join(
508
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
509
- if "type" in item else f'text: {item["text"]}'
510
- for item in content
511
- )
512
- formatted_messages.append(f"{role}: {content_str}")
513
- else:
514
- formatted_messages.append(f"{role}: {content}")
515
- prompt = "\n".join(formatted_messages)
516
-
517
- input_tokens = general_tokens(prompt)
518
-
519
- # Set base span attribues (OTel Semconv)
520
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
521
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
522
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
523
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
524
- SemanticConvetion.GEN_AI_SYSTEM_AI21)
525
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
526
- request_model)
527
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
528
- kwargs.get("seed", ""))
529
- span.set_attribute(SemanticConvetion.SERVER_PORT,
530
- server_port)
531
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
532
- kwargs.get("frequency_penalty", 0.0))
533
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
534
- kwargs.get("max_tokens", -1))
535
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
536
- kwargs.get("presence_penalty", 0.0))
537
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
538
- kwargs.get("stop", []))
539
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
540
- kwargs.get("temperature", 0.4))
541
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
542
- kwargs.get("top_p", 1.0))
543
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
544
- response_dict.get("id"))
545
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
546
- request_model)
547
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
548
- input_tokens)
549
- span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
550
- server_address)
551
-
552
- # Set base span attribues (Extras)
553
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
554
- environment)
555
- span.set_attribute(SERVICE_NAME,
556
- application_name)
557
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
558
- False)
559
- span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
560
- end_time - start_time)
561
- span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
562
- version)
563
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
564
- kwargs.get("max_segments", -1))
565
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
566
- kwargs.get("retrieval_strategy", "segments"))
567
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
568
- kwargs.get("retrieval_similarity_threshold", -1))
569
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
570
- kwargs.get("max_neighbors", -1))
571
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
572
- str(kwargs.get("file_ids", "")))
573
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
574
- kwargs.get("path", ""))
575
- if trace_content:
576
- span.add_event(
577
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
578
- attributes={
579
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
580
- },
581
- )
582
-
583
- output_tokens = 0
584
- for i in range(kwargs.get('n',1)):
585
- output_tokens += general_tokens(response_dict.get('choices')[i].get('content'))
586
-
587
- if trace_content:
588
- span.add_event(
589
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
590
- attributes={
591
- # pylint: disable=line-too-long
592
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('content')),
593
- },
594
- )
595
- if kwargs.get('tools'):
596
- span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
597
- str(response_dict.get('choices')[i].get('message').get('tool_calls')))
598
-
599
- if isinstance(response_dict.get('choices')[i].get('content'), str):
600
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
601
- "text")
602
- elif response_dict.get('choices')[i].get('content') is not None:
603
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
604
- "json")
605
-
606
- # Calculate cost of the operation
607
- cost = get_chat_model_cost(request_model,
608
- pricing_info, input_tokens,
609
- output_tokens)
610
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
611
- cost)
612
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
613
- output_tokens)
614
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
615
- input_tokens + output_tokens)
616
-
617
- span.set_status(Status(StatusCode.OK))
618
-
619
- if disable_metrics is False:
620
- attributes = create_metrics_attributes(
621
- service_name=application_name,
622
- deployment_environment=environment,
623
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
624
- system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
625
- request_model=request_model,
626
- server_address=server_address,
627
- server_port=server_port,
628
- response_model=request_model,
629
- )
630
-
631
- metrics["genai_client_usage_tokens"].record(
632
- input_tokens + output_tokens, attributes
633
- )
634
- metrics["genai_client_operation_duration"].record(
635
- end_time - start_time, attributes
636
- )
637
- metrics["genai_server_ttft"].record(
638
- end_time - start_time, attributes
639
- )
640
- metrics["genai_requests"].add(1, attributes)
641
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
642
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
643
- metrics["genai_cost"].record(cost, attributes)
644
-
645
- # Return original response
646
- return response
647
-
648
- except Exception as e:
649
- handle_exception(span, e)
650
- logger.error("Error in trace creation: %s", e)
651
-
652
- # Return original response
653
- return response
171
+ response = process_chat_rag_response(
172
+ response=response,
173
+ request_model=request_model,
174
+ pricing_info=pricing_info,
175
+ server_port=server_port,
176
+ server_address=server_address,
177
+ environment=environment,
178
+ application_name=application_name,
179
+ metrics=metrics,
180
+ event_provider=event_provider,
181
+ start_time=start_time,
182
+ span=span,
183
+ capture_message_content=capture_message_content,
184
+ disable_metrics=disable_metrics,
185
+ version=version,
186
+ **kwargs
187
+ )
188
+
189
+ return response
654
190
 
655
191
  return wrapper