openlit 1.33.10__py3-none-any.whl → 1.33.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. openlit/__helpers.py +73 -0
  2. openlit/__init__.py +38 -11
  3. openlit/instrumentation/ag2/__init__.py +9 -10
  4. openlit/instrumentation/ag2/ag2.py +133 -68
  5. openlit/instrumentation/ai21/__init__.py +6 -5
  6. openlit/instrumentation/ai21/ai21.py +71 -534
  7. openlit/instrumentation/ai21/async_ai21.py +71 -534
  8. openlit/instrumentation/ai21/utils.py +407 -0
  9. openlit/instrumentation/anthropic/__init__.py +3 -3
  10. openlit/instrumentation/anthropic/anthropic.py +4 -4
  11. openlit/instrumentation/anthropic/async_anthropic.py +4 -4
  12. openlit/instrumentation/assemblyai/__init__.py +2 -2
  13. openlit/instrumentation/assemblyai/assemblyai.py +3 -3
  14. openlit/instrumentation/astra/__init__.py +25 -25
  15. openlit/instrumentation/astra/astra.py +2 -2
  16. openlit/instrumentation/astra/async_astra.py +2 -2
  17. openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
  18. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +8 -8
  19. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +8 -8
  20. openlit/instrumentation/bedrock/__init__.py +2 -2
  21. openlit/instrumentation/bedrock/bedrock.py +3 -3
  22. openlit/instrumentation/chroma/__init__.py +9 -9
  23. openlit/instrumentation/chroma/chroma.py +2 -2
  24. openlit/instrumentation/cohere/__init__.py +7 -7
  25. openlit/instrumentation/cohere/async_cohere.py +9 -9
  26. openlit/instrumentation/cohere/cohere.py +9 -9
  27. openlit/instrumentation/controlflow/__init__.py +4 -4
  28. openlit/instrumentation/controlflow/controlflow.py +2 -2
  29. openlit/instrumentation/crawl4ai/__init__.py +3 -3
  30. openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
  31. openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
  32. openlit/instrumentation/crewai/__init__.py +3 -3
  33. openlit/instrumentation/crewai/crewai.py +2 -2
  34. openlit/instrumentation/dynamiq/__init__.py +5 -5
  35. openlit/instrumentation/dynamiq/dynamiq.py +2 -2
  36. openlit/instrumentation/elevenlabs/__init__.py +5 -5
  37. openlit/instrumentation/elevenlabs/async_elevenlabs.py +3 -3
  38. openlit/instrumentation/elevenlabs/elevenlabs.py +3 -3
  39. openlit/instrumentation/embedchain/__init__.py +2 -2
  40. openlit/instrumentation/embedchain/embedchain.py +4 -4
  41. openlit/instrumentation/firecrawl/__init__.py +3 -3
  42. openlit/instrumentation/firecrawl/firecrawl.py +2 -2
  43. openlit/instrumentation/google_ai_studio/__init__.py +3 -3
  44. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
  45. openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
  46. openlit/instrumentation/gpt4all/__init__.py +3 -3
  47. openlit/instrumentation/gpt4all/gpt4all.py +7 -7
  48. openlit/instrumentation/groq/__init__.py +3 -3
  49. openlit/instrumentation/groq/async_groq.py +5 -5
  50. openlit/instrumentation/groq/groq.py +5 -5
  51. openlit/instrumentation/haystack/__init__.py +2 -2
  52. openlit/instrumentation/haystack/haystack.py +2 -2
  53. openlit/instrumentation/julep/__init__.py +7 -7
  54. openlit/instrumentation/julep/async_julep.py +3 -3
  55. openlit/instrumentation/julep/julep.py +3 -3
  56. openlit/instrumentation/langchain/__init__.py +2 -2
  57. openlit/instrumentation/langchain/async_langchain.py +13 -9
  58. openlit/instrumentation/langchain/langchain.py +13 -8
  59. openlit/instrumentation/letta/__init__.py +7 -7
  60. openlit/instrumentation/letta/letta.py +5 -5
  61. openlit/instrumentation/litellm/__init__.py +5 -5
  62. openlit/instrumentation/litellm/async_litellm.py +8 -8
  63. openlit/instrumentation/litellm/litellm.py +8 -8
  64. openlit/instrumentation/llamaindex/__init__.py +2 -2
  65. openlit/instrumentation/llamaindex/llamaindex.py +2 -2
  66. openlit/instrumentation/mem0/__init__.py +2 -2
  67. openlit/instrumentation/mem0/mem0.py +2 -2
  68. openlit/instrumentation/milvus/__init__.py +2 -2
  69. openlit/instrumentation/milvus/milvus.py +2 -2
  70. openlit/instrumentation/mistral/__init__.py +7 -7
  71. openlit/instrumentation/mistral/async_mistral.py +10 -10
  72. openlit/instrumentation/mistral/mistral.py +10 -10
  73. openlit/instrumentation/multion/__init__.py +7 -7
  74. openlit/instrumentation/multion/async_multion.py +5 -5
  75. openlit/instrumentation/multion/multion.py +5 -5
  76. openlit/instrumentation/ollama/__init__.py +11 -9
  77. openlit/instrumentation/ollama/async_ollama.py +71 -465
  78. openlit/instrumentation/ollama/ollama.py +71 -465
  79. openlit/instrumentation/ollama/utils.py +333 -0
  80. openlit/instrumentation/openai/__init__.py +11 -11
  81. openlit/instrumentation/openai/async_openai.py +18 -18
  82. openlit/instrumentation/openai/openai.py +18 -18
  83. openlit/instrumentation/phidata/__init__.py +2 -2
  84. openlit/instrumentation/phidata/phidata.py +2 -2
  85. openlit/instrumentation/pinecone/__init__.py +6 -6
  86. openlit/instrumentation/pinecone/pinecone.py +2 -2
  87. openlit/instrumentation/premai/__init__.py +3 -3
  88. openlit/instrumentation/premai/premai.py +7 -7
  89. openlit/instrumentation/qdrant/__init__.py +2 -2
  90. openlit/instrumentation/qdrant/async_qdrant.py +2 -2
  91. openlit/instrumentation/qdrant/qdrant.py +2 -2
  92. openlit/instrumentation/reka/__init__.py +3 -3
  93. openlit/instrumentation/reka/async_reka.py +3 -3
  94. openlit/instrumentation/reka/reka.py +3 -3
  95. openlit/instrumentation/together/__init__.py +5 -5
  96. openlit/instrumentation/together/async_together.py +8 -8
  97. openlit/instrumentation/together/together.py +8 -8
  98. openlit/instrumentation/transformers/__init__.py +2 -2
  99. openlit/instrumentation/transformers/transformers.py +4 -4
  100. openlit/instrumentation/vertexai/__init__.py +9 -9
  101. openlit/instrumentation/vertexai/async_vertexai.py +4 -4
  102. openlit/instrumentation/vertexai/vertexai.py +4 -4
  103. openlit/instrumentation/vllm/__init__.py +2 -2
  104. openlit/instrumentation/vllm/vllm.py +3 -3
  105. openlit/otel/events.py +85 -0
  106. openlit/otel/tracing.py +3 -13
  107. openlit/semcov/__init__.py +13 -1
  108. {openlit-1.33.10.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
  109. openlit-1.33.11.dist-info/RECORD +125 -0
  110. openlit-1.33.10.dist-info/RECORD +0 -122
  111. {openlit-1.33.10.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
  112. {openlit-1.33.10.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -4,55 +4,40 @@ Module for monitoring AI21 calls.
4
4
 
5
5
  import logging
6
6
  import time
7
- from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
7
+ from opentelemetry.trace import SpanKind
9
8
  from openlit.__helpers import (
10
- get_chat_model_cost,
11
9
  handle_exception,
12
10
  response_as_dict,
13
- calculate_ttft,
14
- calculate_tbt,
15
- create_metrics_attributes,
16
11
  set_server_address_and_port,
17
- general_tokens
18
12
  )
13
+ from openlit.instrumentation.ai21.utils import (
14
+ process_chunk,
15
+ process_chat_response,
16
+ process_streaming_chat_response,
17
+ process_chat_rag_response
18
+ )
19
+
19
20
  from openlit.semcov import SemanticConvetion
20
21
 
21
22
  # Initialize logger for logging potential issues and operations
22
23
  logger = logging.getLogger(__name__)
23
24
 
24
25
  def async_chat(version, environment, application_name,
25
- tracer, pricing_info, trace_content, metrics, disable_metrics):
26
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
26
27
  """
27
- Generates a telemetry wrapper for chat completions to collect metrics.
28
-
29
- Args:
30
- version: Version of the monitoring package.
31
- environment: Deployment environment (e.g., production, staging).
32
- application_name: Name of the application using the AI21 SDK.
33
- tracer: OpenTelemetry tracer for creating spans.
34
- pricing_info: Information used for calculating the cost of AI21 usage.
35
- trace_content: Flag indicating whether to trace the actual content.
36
-
37
- Returns:
38
- A function that wraps the chat completions method to add telemetry.
28
+ Generates a telemetry wrapper for GenAI function call
39
29
  """
40
30
 
41
31
  class TracedAsyncStream:
42
32
  """
43
- Wrapper for streaming responses to collect metrics and trace data.
44
- Wraps the response to collect message IDs and aggregated response.
45
-
46
- This class implements the '__aiter__' and '__anext__' methods that
47
- handle asynchronous streaming responses.
48
-
49
- This class also implements '__aenter__' and '__aexit__' methods that
50
- handle asynchronous context management protocol.
33
+ Wrapper for streaming responses to collect telemetry.
51
34
  """
35
+
52
36
  def __init__(
53
37
  self,
54
38
  wrapped,
55
39
  span,
40
+ span_name,
56
41
  kwargs,
57
42
  server_address,
58
43
  server_port,
@@ -60,12 +45,14 @@ def async_chat(version, environment, application_name,
60
45
  ):
61
46
  self.__wrapped__ = wrapped
62
47
  self._span = span
48
+ self._span_name = span_name
63
49
  # Placeholder for aggregating streaming response
64
50
  self._llmresponse = ""
65
51
  self._response_id = ""
66
52
  self._finish_reason = ""
67
53
  self._input_tokens = 0
68
54
  self._output_tokens = 0
55
+ self._choices = []
69
56
 
70
57
  self._args = args
71
58
  self._kwargs = kwargs
@@ -94,192 +81,36 @@ def async_chat(version, environment, application_name,
94
81
  async def __anext__(self):
95
82
  try:
96
83
  chunk = await self.__wrapped__.__anext__()
97
- end_time = time.time()
98
- # Record the timestamp for the current chunk
99
- self._timestamps.append(end_time)
100
-
101
- if len(self._timestamps) == 1:
102
- # Calculate time to first chunk
103
- self._ttft = calculate_ttft(self._timestamps, self._start_time)
104
-
105
- chunked = response_as_dict(chunk)
106
- if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
107
- 'content' in chunked.get('choices')[0].get('delta'))):
108
-
109
- content = chunked.get('choices')[0].get('delta').get('content')
110
- if content:
111
- self._llmresponse += content
112
-
113
- if chunked.get('usage'):
114
- self._input_tokens = chunked.get('usage').get("prompt_tokens")
115
- self._output_tokens = chunked.get('usage').get("completion_tokens")
116
-
117
- self._response_id = chunked.get('id')
118
- self._finish_reason = chunked.get('choices')[0].get('finish_reason')
84
+ process_chunk(self, chunk)
119
85
  return chunk
120
86
  except StopAsyncIteration:
121
87
  # Handling exception ensure observability without disrupting operation
122
88
  try:
123
- self._end_time = time.time()
124
- if len(self._timestamps) > 1:
125
- self._tbt = calculate_tbt(self._timestamps)
126
-
127
- # Format 'messages' into a single string
128
- message_prompt = self._kwargs.get("messages", "")
129
- formatted_messages = []
130
- for message in message_prompt:
131
- role = message.role
132
- content = message.content
133
-
134
- if isinstance(content, list):
135
- content_str = ", ".join(
136
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
137
- if "type" in item else f'text: {item["text"]}'
138
- for item in content
139
- )
140
- formatted_messages.append(f"{role}: {content_str}")
141
- else:
142
- formatted_messages.append(f"{role}: {content}")
143
- prompt = "\n".join(formatted_messages)
144
-
145
- request_model = self._kwargs.get("model", "jamba-1.5-mini")
146
-
147
- # Calculate cost of the operation
148
- cost = get_chat_model_cost(request_model,
149
- pricing_info, self._input_tokens,
150
- self._output_tokens)
151
-
152
- # Set Span attributes (OTel Semconv)
153
- self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
154
- self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
155
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
156
- self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
157
- SemanticConvetion.GEN_AI_SYSTEM_AI21)
158
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
159
- request_model)
160
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
161
- self._kwargs.get("seed", ""))
162
- self._span.set_attribute(SemanticConvetion.SERVER_PORT,
163
- self._server_port)
164
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
165
- self._kwargs.get("frequency_penalty", 0.0))
166
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
167
- self._kwargs.get("max_tokens", -1))
168
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
169
- self._kwargs.get("presence_penalty", 0.0))
170
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
171
- self._kwargs.get("stop", []))
172
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
173
- self._kwargs.get("temperature", 0.4))
174
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
175
- self._kwargs.get("top_p", 1.0))
176
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
177
- [self._finish_reason])
178
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
179
- self._response_id)
180
- self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
181
- request_model)
182
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
183
- self._input_tokens)
184
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
185
- self._output_tokens)
186
- self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
187
- self._server_address)
188
-
189
- if isinstance(self._llmresponse, str):
190
- self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
191
- "text")
192
- else:
193
- self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
194
- "json")
195
-
196
- # Set Span attributes (Extra)
197
- self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
198
- environment)
199
- self._span.set_attribute(SERVICE_NAME,
200
- application_name)
201
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
202
- True)
203
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
204
- self._input_tokens + self._output_tokens)
205
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
206
- cost)
207
- self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
208
- self._tbt)
209
- self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
210
- self._ttft)
211
- self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
212
- version)
213
- if trace_content:
214
- self._span.add_event(
215
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
216
- attributes={
217
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
218
- },
219
- )
220
- self._span.add_event(
221
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
222
- attributes={
223
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
224
- },
89
+ with tracer.start_as_current_span(self._span_name, kind= SpanKind.CLIENT) as self._span:
90
+ process_streaming_chat_response(
91
+ self,
92
+ pricing_info=pricing_info,
93
+ environment=environment,
94
+ application_name=application_name,
95
+ metrics=metrics,
96
+ event_provider=event_provider,
97
+ capture_message_content=capture_message_content,
98
+ disable_metrics=disable_metrics,
99
+ version=version
225
100
  )
226
- self._span.set_status(Status(StatusCode.OK))
227
-
228
- if disable_metrics is False:
229
- attributes = create_metrics_attributes(
230
- service_name=application_name,
231
- deployment_environment=environment,
232
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
233
- system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
234
- request_model=request_model,
235
- server_address=self._server_address,
236
- server_port=self._server_port,
237
- response_model=request_model,
238
- )
239
-
240
- metrics["genai_client_usage_tokens"].record(
241
- self._input_tokens + self._output_tokens, attributes
242
- )
243
- metrics["genai_client_operation_duration"].record(
244
- self._end_time - self._start_time, attributes
245
- )
246
- metrics["genai_server_tbt"].record(
247
- self._tbt, attributes
248
- )
249
- metrics["genai_server_ttft"].record(
250
- self._ttft, attributes
251
- )
252
- metrics["genai_requests"].add(1, attributes)
253
- metrics["genai_completion_tokens"].add(self._output_tokens, attributes)
254
- metrics["genai_prompt_tokens"].add(self._input_tokens, attributes)
255
- metrics["genai_cost"].record(cost, attributes)
256
-
257
101
  except Exception as e:
258
102
  handle_exception(self._span, e)
259
103
  logger.error("Error in trace creation: %s", e)
260
- finally:
261
- self._span.end()
262
104
  raise
263
105
 
264
106
  async def wrapper(wrapped, instance, args, kwargs):
265
107
  """
266
- Wraps the 'chat.completions' API call to add telemetry.
267
-
268
- This collects metrics such as execution time, cost, and token usage, and handles errors
269
- gracefully, adding details to the trace for observability.
270
-
271
- Args:
272
- wrapped: The original 'chat.completions' method to be wrapped.
273
- instance: The instance of the class where the original method is defined.
274
- args: Positional arguments for the 'chat.completions' method.
275
- kwargs: Keyword arguments for the 'chat.completions' method.
276
-
277
- Returns:
278
- The response from the original 'chat.completions' method.
108
+ Wraps the GenAI function call.
279
109
  """
280
110
 
281
111
  # Check if streaming is enabled for the API call
282
112
  streaming = kwargs.get("stream", False)
113
+
283
114
  server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
284
115
  request_model = kwargs.get("model", "jamba-1.5-mini")
285
116
 
@@ -290,197 +121,44 @@ def async_chat(version, environment, application_name,
290
121
  # Special handling for streaming response to accommodate the nature of data flow
291
122
  awaited_wrapped = await wrapped(*args, **kwargs)
292
123
  span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
293
-
294
- return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
124
+ return TracedAsyncStream(awaited_wrapped, span, span_name, kwargs, server_address, server_port)
295
125
 
296
126
  # Handling for non-streaming responses
297
127
  else:
298
128
  with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
299
129
  start_time = time.time()
300
130
  response = await wrapped(*args, **kwargs)
301
- end_time = time.time()
302
-
303
- response_dict = response_as_dict(response)
304
-
305
- try:
306
- # Format 'messages' into a single string
307
- message_prompt = kwargs.get("messages", "")
308
- formatted_messages = []
309
- for message in message_prompt:
310
- role = message.role
311
- content = message.content
312
-
313
- if isinstance(content, list):
314
- content_str = ", ".join(
315
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
316
- if "type" in item else f'text: {item["text"]}'
317
- for item in content
318
- )
319
- formatted_messages.append(f"{role}: {content_str}")
320
- else:
321
- formatted_messages.append(f"{role}: {content}")
322
- prompt = "\n".join(formatted_messages)
323
-
324
- input_tokens = response_dict.get('usage').get('prompt_tokens')
325
- output_tokens = response_dict.get('usage').get('completion_tokens')
326
-
327
- # Calculate cost of the operation
328
- cost = get_chat_model_cost(request_model,
329
- pricing_info, input_tokens,
330
- output_tokens)
331
-
332
- # Set base span attribues (OTel Semconv)
333
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
334
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
335
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
336
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
337
- SemanticConvetion.GEN_AI_SYSTEM_AI21)
338
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
339
- request_model)
340
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
341
- kwargs.get("seed", ""))
342
- span.set_attribute(SemanticConvetion.SERVER_PORT,
343
- server_port)
344
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
345
- kwargs.get("frequency_penalty", 0.0))
346
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
347
- kwargs.get("max_tokens", -1))
348
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
349
- kwargs.get("presence_penalty", 0.0))
350
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
351
- kwargs.get("stop", []))
352
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
353
- kwargs.get("temperature", 0.4))
354
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
355
- kwargs.get("top_p", 1.0))
356
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
357
- response_dict.get("id"))
358
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
359
- request_model)
360
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
361
- input_tokens)
362
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
363
- output_tokens)
364
- span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
365
- server_address)
366
-
367
- # Set base span attribues (Extras)
368
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
369
- environment)
370
- span.set_attribute(SERVICE_NAME,
371
- application_name)
372
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
373
- False)
374
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
375
- input_tokens + output_tokens)
376
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
377
- cost)
378
- span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
379
- end_time - start_time)
380
- span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
381
- version)
382
- if trace_content:
383
- span.add_event(
384
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
385
- attributes={
386
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
387
- },
388
- )
389
-
390
- for i in range(kwargs.get('n',1)):
391
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
392
- [response_dict.get('choices')[i].get('finish_reason')])
393
- if trace_content:
394
- span.add_event(
395
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
396
- attributes={
397
- # pylint: disable=line-too-long
398
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
399
- },
400
- )
401
- if kwargs.get('tools'):
402
- span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
403
- str(response_dict.get('choices')[i].get('message').get('tool_calls')))
404
-
405
- if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
406
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
407
- "text")
408
- elif response_dict.get('choices')[i].get('message').get('content') is not None:
409
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
410
- "json")
411
-
412
- span.set_status(Status(StatusCode.OK))
413
-
414
- if disable_metrics is False:
415
- attributes = create_metrics_attributes(
416
- service_name=application_name,
417
- deployment_environment=environment,
418
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
419
- system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
420
- request_model=request_model,
421
- server_address=server_address,
422
- server_port=server_port,
423
- response_model=request_model,
424
- )
425
-
426
- metrics["genai_client_usage_tokens"].record(
427
- input_tokens + output_tokens, attributes
428
- )
429
- metrics["genai_client_operation_duration"].record(
430
- end_time - start_time, attributes
431
- )
432
- metrics["genai_server_ttft"].record(
433
- end_time - start_time, attributes
434
- )
435
- metrics["genai_requests"].add(1, attributes)
436
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
437
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
438
- metrics["genai_cost"].record(cost, attributes)
439
-
440
- # Return original response
441
- return response
442
-
443
- except Exception as e:
444
- handle_exception(span, e)
445
- logger.error("Error in trace creation: %s", e)
446
-
447
- # Return original response
448
- return response
131
+ response = process_chat_response(
132
+ response=response_as_dict(response),
133
+ request_model=request_model,
134
+ pricing_info=pricing_info,
135
+ server_port=server_port,
136
+ server_address=server_address,
137
+ environment=environment,
138
+ application_name=application_name,
139
+ metrics=metrics,
140
+ event_provider=event_provider,
141
+ start_time=start_time,
142
+ span=span,
143
+ capture_message_content=capture_message_content,
144
+ disable_metrics=disable_metrics,
145
+ version=version,
146
+ **kwargs
147
+ )
148
+
149
+ return response
449
150
 
450
151
  return wrapper
451
152
 
452
153
  def async_chat_rag(version, environment, application_name,
453
- tracer, pricing_info, trace_content, metrics, disable_metrics):
154
+ tracer, event_provider, pricing_info, capture_message_content, metrics, disable_metrics):
454
155
  """
455
- Generates a telemetry wrapper for chat completions to collect metrics.
456
-
457
- Args:
458
- version: Version of the monitoring package.
459
- environment: Deployment environment (e.g., production, staging).
460
- application_name: Name of the application using the AI21 SDK.
461
- tracer: OpenTelemetry tracer for creating spans.
462
- pricing_info: Information used for calculating the cost of AI21 usage.
463
- trace_content: Flag indicating whether to trace the actual content.
464
-
465
- Returns:
466
- A function that wraps the chat completions method to add telemetry.
156
+ Generates a telemetry wrapper for GenAI function call
467
157
  """
468
158
 
469
159
  async def wrapper(wrapped, instance, args, kwargs):
470
160
  """
471
- Wraps the 'chat.completions' API call to add telemetry.
472
-
473
- This collects metrics such as execution time, cost, and token usage, and handles errors
474
- gracefully, adding details to the trace for observability.
475
-
476
- Args:
477
- wrapped: The original 'chat.completions' method to be wrapped.
478
- instance: The instance of the class where the original method is defined.
479
- args: Positional arguments for the 'chat.completions' method.
480
- kwargs: Keyword arguments for the 'chat.completions' method.
481
-
482
- Returns:
483
- The response from the original 'chat.completions' method.
161
+ Wraps the GenAI function call.
484
162
  """
485
163
 
486
164
  server_address, server_port = set_server_address_and_port(instance, "api.ai21.com", 443)
@@ -491,165 +169,24 @@ def async_chat_rag(version, environment, application_name,
491
169
  with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
492
170
  start_time = time.time()
493
171
  response = await wrapped(*args, **kwargs)
494
- end_time = time.time()
495
-
496
- response_dict = response_as_dict(response)
497
-
498
- try:
499
- # Format 'messages' into a single string
500
- message_prompt = kwargs.get("messages", "")
501
- formatted_messages = []
502
- for message in message_prompt:
503
- role = message.role
504
- content = message.content
505
-
506
- if isinstance(content, list):
507
- content_str = ", ".join(
508
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
509
- if "type" in item else f'text: {item["text"]}'
510
- for item in content
511
- )
512
- formatted_messages.append(f"{role}: {content_str}")
513
- else:
514
- formatted_messages.append(f"{role}: {content}")
515
- prompt = "\n".join(formatted_messages)
516
-
517
- input_tokens = general_tokens(prompt)
518
-
519
- # Set base span attribues (OTel Semconv)
520
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
521
- span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
522
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
523
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
524
- SemanticConvetion.GEN_AI_SYSTEM_AI21)
525
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
526
- request_model)
527
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
528
- kwargs.get("seed", ""))
529
- span.set_attribute(SemanticConvetion.SERVER_PORT,
530
- server_port)
531
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
532
- kwargs.get("frequency_penalty", 0.0))
533
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
534
- kwargs.get("max_tokens", -1))
535
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
536
- kwargs.get("presence_penalty", 0.0))
537
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
538
- kwargs.get("stop", []))
539
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
540
- kwargs.get("temperature", 0.4))
541
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
542
- kwargs.get("top_p", 1.0))
543
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
544
- response_dict.get("id"))
545
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
546
- request_model)
547
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
548
- input_tokens)
549
- span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
550
- server_address)
551
-
552
- # Set base span attribues (Extras)
553
- span.set_attribute(DEPLOYMENT_ENVIRONMENT,
554
- environment)
555
- span.set_attribute(SERVICE_NAME,
556
- application_name)
557
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
558
- False)
559
- span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
560
- end_time - start_time)
561
- span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
562
- version)
563
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_SEGMENTS,
564
- kwargs.get("max_segments", -1))
565
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_STRATEGY,
566
- kwargs.get("retrieval_strategy", "segments"))
567
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_SIMILARITY_THRESHOLD,
568
- kwargs.get("retrieval_similarity_threshold", -1))
569
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_MAX_NEIGHBORS,
570
- kwargs.get("max_neighbors", -1))
571
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_FILE_IDS,
572
- str(kwargs.get("file_ids", "")))
573
- span.set_attribute(SemanticConvetion.GEN_AI_RAG_DOCUMENTS_PATH,
574
- kwargs.get("path", ""))
575
- if trace_content:
576
- span.add_event(
577
- name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
578
- attributes={
579
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
580
- },
581
- )
582
-
583
- output_tokens = 0
584
- for i in range(kwargs.get('n',1)):
585
- output_tokens += general_tokens(response_dict.get('choices')[i].get('content'))
586
-
587
- if trace_content:
588
- span.add_event(
589
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
590
- attributes={
591
- # pylint: disable=line-too-long
592
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('content')),
593
- },
594
- )
595
- if kwargs.get('tools'):
596
- span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
597
- str(response_dict.get('choices')[i].get('message').get('tool_calls')))
598
-
599
- if isinstance(response_dict.get('choices')[i].get('content'), str):
600
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
601
- "text")
602
- elif response_dict.get('choices')[i].get('content') is not None:
603
- span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
604
- "json")
605
-
606
- # Calculate cost of the operation
607
- cost = get_chat_model_cost(request_model,
608
- pricing_info, input_tokens,
609
- output_tokens)
610
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
611
- cost)
612
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
613
- output_tokens)
614
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
615
- input_tokens + output_tokens)
616
-
617
- span.set_status(Status(StatusCode.OK))
618
-
619
- if disable_metrics is False:
620
- attributes = create_metrics_attributes(
621
- service_name=application_name,
622
- deployment_environment=environment,
623
- operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
624
- system=SemanticConvetion.GEN_AI_SYSTEM_AI21,
625
- request_model=request_model,
626
- server_address=server_address,
627
- server_port=server_port,
628
- response_model=request_model,
629
- )
630
-
631
- metrics["genai_client_usage_tokens"].record(
632
- input_tokens + output_tokens, attributes
633
- )
634
- metrics["genai_client_operation_duration"].record(
635
- end_time - start_time, attributes
636
- )
637
- metrics["genai_server_ttft"].record(
638
- end_time - start_time, attributes
639
- )
640
- metrics["genai_requests"].add(1, attributes)
641
- metrics["genai_completion_tokens"].add(output_tokens, attributes)
642
- metrics["genai_prompt_tokens"].add(input_tokens, attributes)
643
- metrics["genai_cost"].record(cost, attributes)
644
-
645
- # Return original response
646
- return response
647
-
648
- except Exception as e:
649
- handle_exception(span, e)
650
- logger.error("Error in trace creation: %s", e)
651
-
652
- # Return original response
653
- return response
172
+ response = process_chat_rag_response(
173
+ response=response_as_dict(response),
174
+ request_model=request_model,
175
+ pricing_info=pricing_info,
176
+ server_port=server_port,
177
+ server_address=server_address,
178
+ environment=environment,
179
+ application_name=application_name,
180
+ metrics=metrics,
181
+ event_provider=event_provider,
182
+ start_time=start_time,
183
+ span=span,
184
+ capture_message_content=capture_message_content,
185
+ disable_metrics=disable_metrics,
186
+ version=version,
187
+ **kwargs
188
+ )
189
+
190
+ return response
654
191
 
655
192
  return wrapper