openlit 1.33.9__py3-none-any.whl → 1.33.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openlit/__helpers.py +78 -0
  2. openlit/__init__.py +41 -13
  3. openlit/instrumentation/ag2/__init__.py +9 -10
  4. openlit/instrumentation/ag2/ag2.py +134 -69
  5. openlit/instrumentation/ai21/__init__.py +6 -5
  6. openlit/instrumentation/ai21/ai21.py +71 -534
  7. openlit/instrumentation/ai21/async_ai21.py +71 -534
  8. openlit/instrumentation/ai21/utils.py +407 -0
  9. openlit/instrumentation/anthropic/__init__.py +3 -3
  10. openlit/instrumentation/anthropic/anthropic.py +5 -5
  11. openlit/instrumentation/anthropic/async_anthropic.py +5 -5
  12. openlit/instrumentation/assemblyai/__init__.py +2 -2
  13. openlit/instrumentation/assemblyai/assemblyai.py +3 -3
  14. openlit/instrumentation/astra/__init__.py +25 -25
  15. openlit/instrumentation/astra/astra.py +7 -7
  16. openlit/instrumentation/astra/async_astra.py +7 -7
  17. openlit/instrumentation/azure_ai_inference/__init__.py +5 -5
  18. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +11 -11
  19. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +11 -11
  20. openlit/instrumentation/bedrock/__init__.py +2 -2
  21. openlit/instrumentation/bedrock/bedrock.py +3 -3
  22. openlit/instrumentation/chroma/__init__.py +9 -9
  23. openlit/instrumentation/chroma/chroma.py +7 -7
  24. openlit/instrumentation/cohere/__init__.py +7 -7
  25. openlit/instrumentation/cohere/async_cohere.py +10 -10
  26. openlit/instrumentation/cohere/cohere.py +11 -11
  27. openlit/instrumentation/controlflow/__init__.py +4 -4
  28. openlit/instrumentation/controlflow/controlflow.py +5 -5
  29. openlit/instrumentation/crawl4ai/__init__.py +3 -3
  30. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  31. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  32. openlit/instrumentation/crewai/__init__.py +3 -3
  33. openlit/instrumentation/crewai/crewai.py +6 -4
  34. openlit/instrumentation/dynamiq/__init__.py +5 -5
  35. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  36. openlit/instrumentation/elevenlabs/__init__.py +5 -5
  37. openlit/instrumentation/elevenlabs/async_elevenlabs.py +4 -5
  38. openlit/instrumentation/elevenlabs/elevenlabs.py +4 -5
  39. openlit/instrumentation/embedchain/__init__.py +2 -2
  40. openlit/instrumentation/embedchain/embedchain.py +9 -9
  41. openlit/instrumentation/firecrawl/__init__.py +3 -3
  42. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  43. openlit/instrumentation/google_ai_studio/__init__.py +3 -3
  44. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +3 -3
  45. openlit/instrumentation/google_ai_studio/google_ai_studio.py +3 -3
  46. openlit/instrumentation/gpt4all/__init__.py +5 -5
  47. openlit/instrumentation/gpt4all/gpt4all.py +350 -225
  48. openlit/instrumentation/gpu/__init__.py +5 -5
  49. openlit/instrumentation/groq/__init__.py +5 -5
  50. openlit/instrumentation/groq/async_groq.py +359 -243
  51. openlit/instrumentation/groq/groq.py +359 -243
  52. openlit/instrumentation/haystack/__init__.py +2 -2
  53. openlit/instrumentation/haystack/haystack.py +5 -5
  54. openlit/instrumentation/julep/__init__.py +7 -7
  55. openlit/instrumentation/julep/async_julep.py +6 -6
  56. openlit/instrumentation/julep/julep.py +6 -6
  57. openlit/instrumentation/langchain/__init__.py +15 -9
  58. openlit/instrumentation/langchain/async_langchain.py +388 -0
  59. openlit/instrumentation/langchain/langchain.py +110 -497
  60. openlit/instrumentation/letta/__init__.py +7 -7
  61. openlit/instrumentation/letta/letta.py +10 -8
  62. openlit/instrumentation/litellm/__init__.py +9 -10
  63. openlit/instrumentation/litellm/async_litellm.py +321 -250
  64. openlit/instrumentation/litellm/litellm.py +319 -248
  65. openlit/instrumentation/llamaindex/__init__.py +2 -2
  66. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  67. openlit/instrumentation/mem0/__init__.py +2 -2
  68. openlit/instrumentation/mem0/mem0.py +5 -5
  69. openlit/instrumentation/milvus/__init__.py +2 -2
  70. openlit/instrumentation/milvus/milvus.py +7 -7
  71. openlit/instrumentation/mistral/__init__.py +13 -13
  72. openlit/instrumentation/mistral/async_mistral.py +426 -253
  73. openlit/instrumentation/mistral/mistral.py +424 -250
  74. openlit/instrumentation/multion/__init__.py +7 -7
  75. openlit/instrumentation/multion/async_multion.py +9 -7
  76. openlit/instrumentation/multion/multion.py +9 -7
  77. openlit/instrumentation/ollama/__init__.py +19 -39
  78. openlit/instrumentation/ollama/async_ollama.py +137 -563
  79. openlit/instrumentation/ollama/ollama.py +136 -563
  80. openlit/instrumentation/ollama/utils.py +333 -0
  81. openlit/instrumentation/openai/__init__.py +11 -11
  82. openlit/instrumentation/openai/async_openai.py +25 -27
  83. openlit/instrumentation/openai/openai.py +25 -27
  84. openlit/instrumentation/phidata/__init__.py +2 -2
  85. openlit/instrumentation/phidata/phidata.py +6 -4
  86. openlit/instrumentation/pinecone/__init__.py +6 -6
  87. openlit/instrumentation/pinecone/pinecone.py +7 -7
  88. openlit/instrumentation/premai/__init__.py +5 -5
  89. openlit/instrumentation/premai/premai.py +268 -219
  90. openlit/instrumentation/qdrant/__init__.py +2 -2
  91. openlit/instrumentation/qdrant/async_qdrant.py +7 -7
  92. openlit/instrumentation/qdrant/qdrant.py +7 -7
  93. openlit/instrumentation/reka/__init__.py +5 -5
  94. openlit/instrumentation/reka/async_reka.py +93 -55
  95. openlit/instrumentation/reka/reka.py +93 -55
  96. openlit/instrumentation/together/__init__.py +9 -9
  97. openlit/instrumentation/together/async_together.py +284 -242
  98. openlit/instrumentation/together/together.py +284 -242
  99. openlit/instrumentation/transformers/__init__.py +3 -3
  100. openlit/instrumentation/transformers/transformers.py +79 -48
  101. openlit/instrumentation/vertexai/__init__.py +19 -69
  102. openlit/instrumentation/vertexai/async_vertexai.py +333 -990
  103. openlit/instrumentation/vertexai/vertexai.py +333 -990
  104. openlit/instrumentation/vllm/__init__.py +3 -3
  105. openlit/instrumentation/vllm/vllm.py +65 -35
  106. openlit/otel/events.py +85 -0
  107. openlit/otel/tracing.py +3 -13
  108. openlit/semcov/__init__.py +16 -4
  109. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/METADATA +2 -2
  110. openlit-1.33.11.dist-info/RECORD +125 -0
  111. openlit-1.33.9.dist-info/RECORD +0 -121
  112. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/LICENSE +0 -0
  113. {openlit-1.33.9.dist-info → openlit-1.33.11.dist-info}/WHEEL +0 -0
@@ -1,36 +1,38 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
2
1
  """
3
2
  Module for monitoring LiteLLM calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
10
  get_chat_model_cost,
11
11
  get_embed_model_cost,
12
- openai_tokens,
12
+ general_tokens,
13
13
  handle_exception,
14
14
  response_as_dict,
15
+ calculate_ttft,
16
+ calculate_tbt,
17
+ create_metrics_attributes,
15
18
  )
16
19
  from openlit.semcov import SemanticConvetion
17
20
 
18
21
  # Initialize logger for logging potential issues and operations
19
22
  logger = logging.getLogger(__name__)
20
23
 
21
- def acompletion(gen_ai_endpoint, version, environment, application_name,
22
- tracer, pricing_info, trace_content, metrics, disable_metrics):
24
+ def acompletion(version, environment, application_name,
25
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
23
26
  """
24
27
  Generates a telemetry wrapper for chat completions to collect metrics.
25
28
 
26
29
  Args:
27
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
28
30
  version: Version of the monitoring package.
29
31
  environment: Deployment environment (e.g., production, staging).
30
32
  application_name: Name of the application using the LiteLLM SDK.
31
33
  tracer: OpenTelemetry tracer for creating spans.
32
34
  pricing_info: Information used for calculating the cost of LiteLLM usage.
33
- trace_content: Flag indicating whether to trace the actual content.
35
+ capture_message_content: Flag indicating whether to trace the actual content.
34
36
 
35
37
  Returns:
36
38
  A function that wraps the chat completions method to add telemetry.
@@ -51,16 +53,27 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
51
53
  wrapped,
52
54
  span,
53
55
  kwargs,
56
+ server_address,
57
+ server_port,
54
58
  **args,
55
59
  ):
56
60
  self.__wrapped__ = wrapped
57
61
  self._span = span
58
- # Placeholder for aggregating streaming response
59
- self._llmresponse = ""
60
- self._response_id = ""
62
+ self._llmresponse = ''
63
+ self._response_id = ''
64
+ self._response_model = ''
65
+ self._finish_reason = ''
66
+ self._response_service_tier = ''
61
67
 
62
68
  self._args = args
63
69
  self._kwargs = kwargs
70
+ self._start_time = time.time()
71
+ self._end_time = None
72
+ self._timestamps = []
73
+ self._ttft = 0
74
+ self._tbt = 0
75
+ self._server_address = server_address
76
+ self._server_port = server_port
64
77
 
65
78
  async def __aenter__(self):
66
79
  await self.__wrapped__.__aenter__()
@@ -79,6 +92,14 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
79
92
  async def __anext__(self):
80
93
  try:
81
94
  chunk = await self.__wrapped__.__anext__()
95
+ end_time = time.time()
96
+ # Record the timestamp for the current chunk
97
+ self._timestamps.append(end_time)
98
+
99
+ if len(self._timestamps) == 1:
100
+ # Calculate time to first chunk
101
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
102
+
82
103
  chunked = response_as_dict(chunk)
83
104
  # Collect message IDs and aggregated response from events
84
105
  if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
@@ -88,81 +109,115 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
88
109
  if content:
89
110
  self._llmresponse += content
90
111
  self._response_id = chunked.get('id')
112
+ self._response_model = chunked.get('model')
113
+ self._finish_reason = chunked.get('choices')[0].get('finish_reason')
114
+ self._response_service_tier = str(chunked.get('system_fingerprint'))
91
115
  return chunk
92
116
  except StopAsyncIteration:
93
117
  # Handling exception ensure observability without disrupting operation
94
118
  try:
119
+ self._end_time = time.time()
120
+ if len(self._timestamps) > 1:
121
+ self._tbt = calculate_tbt(self._timestamps)
122
+
95
123
  # Format 'messages' into a single string
96
- message_prompt = self._kwargs.get("messages", "")
124
+ message_prompt = self._kwargs.get('messages', '')
97
125
  formatted_messages = []
98
126
  for message in message_prompt:
99
- role = message["role"]
100
- content = message["content"]
127
+ role = message['role']
128
+ content = message['content']
101
129
 
102
130
  if isinstance(content, list):
103
131
  content_str = ", ".join(
104
- # pylint: disable=line-too-long
105
132
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
106
133
  if "type" in item else f'text: {item["text"]}'
107
134
  for item in content
108
135
  )
109
- formatted_messages.append(f"{role}: {content_str}")
136
+ formatted_messages.append(f'{role}: {content_str}')
110
137
  else:
111
- formatted_messages.append(f"{role}: {content}")
112
- prompt = "\n".join(formatted_messages)
138
+ formatted_messages.append(f'{role}: {content}')
139
+ prompt = '\n'.join(formatted_messages)
140
+
141
+ request_model = self._kwargs.get('model', 'openai/gpt-4o')
113
142
 
114
143
  # Calculate tokens using input prompt and aggregated response
115
- prompt_tokens = openai_tokens(prompt,
116
- self._kwargs.get("model", "gpt-3.5-turbo"))
117
- completion_tokens = openai_tokens(self._llmresponse,
118
- self._kwargs.get("model", "gpt-3.5-turbo"))
144
+ input_tokens = general_tokens(prompt)
145
+ output_tokens = general_tokens(self._llmresponse)
119
146
 
120
147
  # Calculate cost of the operation
121
- cost = get_chat_model_cost(self._kwargs.get("model", "gpt-3.5-turbo"),
122
- pricing_info, prompt_tokens,
123
- completion_tokens)
148
+ cost = get_chat_model_cost(request_model,
149
+ pricing_info, input_tokens,
150
+ output_tokens)
124
151
 
125
- # Set Span attributes
126
- self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
127
- self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
128
- SemanticConvetion.GEN_AI_SYSTEM_LITELLM)
152
+ # Set Span attributes (OTel Semconv)
153
+ self._span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
129
154
  self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
130
155
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
131
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
132
- gen_ai_endpoint)
156
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
157
+ SemanticConvetion.GEN_AI_SYSTEM_LITELLM)
158
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
159
+ request_model)
160
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
161
+ self._kwargs.get('seed', ''))
162
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
163
+ self._server_port)
164
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
165
+ self._kwargs.get('frequency_penalty', 0.0))
166
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
167
+ self._kwargs.get('max_tokens', -1))
168
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
169
+ self._kwargs.get('presence_penalty', 0.0))
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
171
+ self._kwargs.get('stop', []))
172
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
173
+ self._kwargs.get('temperature', 1.0))
174
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
175
+ self._kwargs.get('top_p', 1.0))
176
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
177
+ [self._finish_reason])
133
178
  self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
134
179
  self._response_id)
135
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
181
+ self._response_model)
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
183
+ input_tokens)
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
185
+ output_tokens)
186
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
187
+ self._server_address)
188
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
189
+ self._kwargs.get('service_tier', 'auto'))
190
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SERVICE_TIER,
191
+ self._response_service_tier)
192
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
193
+ self._response_service_tier)
194
+ if isinstance(self._llmresponse, str):
195
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
196
+ 'text')
197
+ else:
198
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
199
+ 'json')
200
+
201
+ # Set Span attributes (Extra)
202
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
136
203
  environment)
137
- self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
204
+ self._span.set_attribute(SERVICE_NAME,
138
205
  application_name)
139
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
140
- self._kwargs.get("model", "gpt-3.5-turbo"))
141
206
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
142
- self._kwargs.get("user", ""))
143
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
144
- self._kwargs.get("top_p", 1.0))
145
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
146
- self._kwargs.get("max_tokens", -1))
147
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
148
- self._kwargs.get("temperature", 1.0))
149
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
150
- self._kwargs.get("presence_penalty", 0.0))
151
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
152
- self._kwargs.get("frequency_penalty", 0.0))
153
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
154
- self._kwargs.get("seed", ""))
207
+ self._kwargs.get('user', ''))
155
208
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
156
209
  True)
157
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
158
- prompt_tokens)
159
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
160
- completion_tokens)
161
210
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
162
- prompt_tokens + completion_tokens)
211
+ input_tokens + output_tokens)
163
212
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
164
213
  cost)
165
- if trace_content:
214
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
215
+ self._tbt)
216
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
217
+ self._ttft)
218
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
219
+ version)
220
+ if capture_message_content:
166
221
  self._span.add_event(
167
222
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
168
223
  attributes={
@@ -175,36 +230,40 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
175
230
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
176
231
  },
177
232
  )
178
-
179
233
  self._span.set_status(Status(StatusCode.OK))
180
234
 
181
235
  if disable_metrics is False:
182
- attributes = {
183
- TELEMETRY_SDK_NAME:
184
- "openlit",
185
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
186
- application_name,
187
- SemanticConvetion.GEN_AI_SYSTEM:
188
- SemanticConvetion.GEN_AI_SYSTEM_LITELLM,
189
- SemanticConvetion.GEN_AI_ENVIRONMENT:
190
- environment,
191
- SemanticConvetion.GEN_AI_OPERATION:
192
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
193
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
194
- self._kwargs.get("model", "gpt-3.5-turbo")
195
- }
196
-
197
- metrics["genai_requests"].add(1, attributes)
198
- metrics["genai_total_tokens"].add(
199
- prompt_tokens + completion_tokens, attributes
236
+ attributes = create_metrics_attributes(
237
+ service_name=application_name,
238
+ deployment_environment=environment,
239
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
240
+ system=SemanticConvetion.GEN_AI_SYSTEM_LITELLM,
241
+ request_model=request_model,
242
+ server_address=self._server_address,
243
+ server_port=self._server_port,
244
+ response_model=self._response_model,
200
245
  )
201
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
202
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
203
- metrics["genai_cost"].record(cost, attributes)
246
+
247
+ metrics['genai_client_usage_tokens'].record(
248
+ input_tokens + output_tokens, attributes
249
+ )
250
+ metrics['genai_client_operation_duration'].record(
251
+ self._end_time - self._start_time, attributes
252
+ )
253
+ metrics['genai_server_tbt'].record(
254
+ self._tbt, attributes
255
+ )
256
+ metrics['genai_server_ttft'].record(
257
+ self._ttft, attributes
258
+ )
259
+ metrics['genai_requests'].add(1, attributes)
260
+ metrics['genai_completion_tokens'].add(output_tokens, attributes)
261
+ metrics['genai_prompt_tokens'].add(input_tokens, attributes)
262
+ metrics['genai_cost'].record(cost, attributes)
204
263
 
205
264
  except Exception as e:
206
265
  handle_exception(self._span, e)
207
- logger.error("Error in trace creation: %s", e)
266
+ logger.error('Error in trace creation: %s', e)
208
267
  finally:
209
268
  self._span.end()
210
269
  raise
@@ -227,77 +286,114 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
227
286
  """
228
287
 
229
288
  # Check if streaming is enabled for the API call
230
- streaming = kwargs.get("stream", False)
289
+ streaming = kwargs.get('stream', False)
290
+ server_address, server_port = 'NOT_FOUND', 'NOT_FOUND'
291
+ request_model = kwargs.get('model', 'openai/gpt-4o')
292
+
293
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}'
231
294
 
232
295
  # pylint: disable=no-else-return
233
296
  if streaming:
234
297
  # Special handling for streaming response to accommodate the nature of data flow
235
298
  awaited_wrapped = await wrapped(*args, **kwargs)
236
- span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
299
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
237
300
 
238
- return TracedAsyncStream(awaited_wrapped, span, kwargs)
301
+ return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
239
302
 
303
+ # Handling for non-streaming responses
240
304
  # Handling for non-streaming responses
241
305
  else:
242
- # pylint: disable=line-too-long
243
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
306
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
307
+ start_time = time.time()
244
308
  response = await wrapped(*args, **kwargs)
309
+ end_time = time.time()
245
310
 
246
311
  response_dict = response_as_dict(response)
247
312
 
248
313
  try:
249
314
  # Format 'messages' into a single string
250
- message_prompt = kwargs.get("messages", "")
315
+ message_prompt = kwargs.get('messages', '')
251
316
  formatted_messages = []
252
317
  for message in message_prompt:
253
- role = message["role"]
254
- content = message["content"]
318
+ role = message['role']
319
+ content = message['content']
255
320
 
256
321
  if isinstance(content, list):
257
322
  content_str = ", ".join(
258
- # pylint: disable=line-too-long
259
323
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
260
324
  if "type" in item else f'text: {item["text"]}'
261
325
  for item in content
262
326
  )
263
- formatted_messages.append(f"{role}: {content_str}")
327
+ formatted_messages.append(f'{role}: {content_str}')
264
328
  else:
265
- formatted_messages.append(f"{role}: {content}")
266
- prompt = "\n".join(formatted_messages)
329
+ formatted_messages.append(f'{role}: {content}')
330
+ prompt = '\n'.join(formatted_messages)
267
331
 
268
- # Set base span attribues
269
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
270
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
271
- SemanticConvetion.GEN_AI_SYSTEM_LITELLM)
332
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
333
+ output_tokens = response_dict.get('usage').get('completion_tokens')
334
+
335
+ # Calculate cost of the operation
336
+ cost = get_chat_model_cost(request_model,
337
+ pricing_info, input_tokens,
338
+ output_tokens)
339
+
340
+ # Set base span attribues (OTel Semconv)
341
+ span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
272
342
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
273
343
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
274
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
275
- gen_ai_endpoint)
344
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
345
+ SemanticConvetion.GEN_AI_SYSTEM_LITELLM)
346
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
347
+ request_model)
348
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
349
+ kwargs.get('seed', ''))
350
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
351
+ server_port)
352
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
353
+ kwargs.get('frequency_penalty', 0.0))
354
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
355
+ kwargs.get('max_tokens', -1))
356
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
357
+ kwargs.get('presence_penalty', 0.0))
358
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
359
+ kwargs.get('stop', []))
360
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
361
+ kwargs.get('temperature', 1.0))
362
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
363
+ kwargs.get('top_p', 1.0))
276
364
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
277
- response_dict.get("id"))
278
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
365
+ response_dict.get('id'))
366
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
367
+ response_dict.get('model'))
368
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
369
+ input_tokens)
370
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
371
+ output_tokens)
372
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
373
+ server_address)
374
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
375
+ kwargs.get('service_tier', 'auto'))
376
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
377
+ str(response_dict.get('system_fingerprint')))
378
+
379
+ # Set base span attribues (Extras)
380
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
279
381
  environment)
280
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
382
+ span.set_attribute(SERVICE_NAME,
281
383
  application_name)
282
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
283
- kwargs.get("model", "gpt-3.5-turbo"))
284
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
285
- kwargs.get("top_p", 1.0))
286
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
287
- kwargs.get("max_tokens", -1))
288
384
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
289
- kwargs.get("user", ""))
290
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
291
- kwargs.get("temperature", 1.0))
292
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
293
- kwargs.get("presence_penalty", 0.0))
294
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
295
- kwargs.get("frequency_penalty", 0.0))
296
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
297
- kwargs.get("seed", ""))
385
+ kwargs.get('user', ''))
298
386
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
299
387
  False)
300
- if trace_content:
388
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
389
+ input_tokens + output_tokens)
390
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
391
+ cost)
392
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
393
+ end_time - start_time)
394
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
395
+ version)
396
+ if capture_message_content:
301
397
  span.add_event(
302
398
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
303
399
  attributes={
@@ -305,121 +401,81 @@ def acompletion(gen_ai_endpoint, version, environment, application_name,
305
401
  },
306
402
  )
307
403
 
308
- # Set span attributes when tools is not passed to the function call
309
- if "tools" not in kwargs:
310
- # Calculate cost of the operation
311
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
312
- pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
313
- response_dict.get('usage', {}).get('completion_tokens', None))
314
-
315
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
316
- response_dict.get('usage', {}).get('prompt_tokens', None))
317
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
318
- response_dict.get('usage', {}).get('completion_tokens', None))
319
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
320
- response_dict.get('usage', {}).get('total_tokens', None))
404
+ for i in range(kwargs.get('n',1)):
321
405
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
322
- [response_dict.get('choices', [])[0].get('finish_reason', None)])
323
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
324
- cost)
325
-
326
- # Set span attributes for when n = 1 (default)
327
- if "n" not in kwargs or kwargs["n"] == 1:
328
- if trace_content:
329
- span.add_event(
330
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
331
- attributes={
332
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
333
- },
334
- )
335
-
336
- # Set span attributes for when n > 0
337
- else:
338
- i = 0
339
- while i < kwargs["n"] and trace_content is True:
340
- attribute_name = f"gen_ai.content.completion.{i}"
341
- span.add_event(
342
- name=attribute_name,
343
- attributes={
344
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
345
- },
346
- )
347
- i += 1
348
-
349
- # Return original response
350
- return response
351
-
352
- # Set span attributes when tools is passed to the function call
353
- elif "tools" in kwargs:
354
- # Calculate cost of the operation
355
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
356
- pricing_info, response_dict.get('usage').get('prompt_tokens'),
357
- response_dict.get('usage').get('completion_tokens'))
358
- span.add_event(
359
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
360
- attributes={
361
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
362
- },
363
- )
364
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
365
- response_dict.get('usage').get('prompt_tokens'))
366
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
367
- response_dict.get('usage').get('completion_tokens'))
368
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
369
- response_dict.get('usage').get('total_tokens'))
370
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
371
- cost)
406
+ [response_dict.get('choices')[i].get('finish_reason')])
407
+ if capture_message_content:
408
+ span.add_event(
409
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
410
+ attributes={
411
+ # pylint: disable=line-too-long
412
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
413
+ },
414
+ )
415
+ if kwargs.get('tools'):
416
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
417
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
418
+
419
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
420
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
421
+ 'text')
422
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
423
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
424
+ 'json')
372
425
 
373
426
  span.set_status(Status(StatusCode.OK))
374
427
 
375
428
  if disable_metrics is False:
376
- attributes = {
377
- TELEMETRY_SDK_NAME:
378
- "openlit",
379
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
380
- application_name,
381
- SemanticConvetion.GEN_AI_SYSTEM:
382
- SemanticConvetion.GEN_AI_SYSTEM_LITELLM,
383
- SemanticConvetion.GEN_AI_ENVIRONMENT:
384
- environment,
385
- SemanticConvetion.GEN_AI_OPERATION:
386
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
387
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
388
- kwargs.get("model", "gpt-3.5-turbo")
389
- }
390
-
391
- metrics["genai_requests"].add(1, attributes)
392
- metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
393
- metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
394
- metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
395
- metrics["genai_cost"].record(cost, attributes)
429
+ attributes = create_metrics_attributes(
430
+ service_name=application_name,
431
+ deployment_environment=environment,
432
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
433
+ system=SemanticConvetion.GEN_AI_SYSTEM_LITELLM,
434
+ request_model=request_model,
435
+ server_address=server_address,
436
+ server_port=server_port,
437
+ response_model=response_dict.get('model'),
438
+ )
439
+
440
+ metrics['genai_client_usage_tokens'].record(
441
+ input_tokens + output_tokens, attributes
442
+ )
443
+ metrics['genai_client_operation_duration'].record(
444
+ end_time - start_time, attributes
445
+ )
446
+ metrics['genai_server_ttft'].record(
447
+ end_time - start_time, attributes
448
+ )
449
+ metrics['genai_requests'].add(1, attributes)
450
+ metrics['genai_completion_tokens'].add(output_tokens, attributes)
451
+ metrics['genai_prompt_tokens'].add(input_tokens, attributes)
452
+ metrics['genai_cost'].record(cost, attributes)
396
453
 
397
454
  # Return original response
398
455
  return response
399
456
 
400
457
  except Exception as e:
401
458
  handle_exception(span, e)
402
- logger.error("Error in trace creation: %s", e)
459
+ logger.error('Error in trace creation: %s', e)
403
460
 
404
461
  # Return original response
405
462
  return response
406
463
 
407
464
  return wrapper
408
465
 
409
- def aembedding(gen_ai_endpoint, version, environment, application_name,
410
- tracer, pricing_info, trace_content, metrics, disable_metrics):
466
+ def aembedding(version, environment, application_name,
467
+ tracer, pricing_info, capture_message_content, metrics, disable_metrics):
411
468
  """
412
469
  Generates a telemetry wrapper for embeddings to collect metrics.
413
-
470
+
414
471
  Args:
415
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
416
472
  version: Version of the monitoring package.
417
473
  environment: Deployment environment (e.g., production, staging).
418
- application_name: Name of the application using the OpenAI API.
474
+ application_name: Name of the application using the LiteLLM API.
419
475
  tracer: OpenTelemetry tracer for creating spans.
420
- pricing_info: Information used for calculating the cost of OpenAI usage.
421
- trace_content: Flag indicating whether to trace the actual content.
422
-
476
+ pricing_info: Information used for calculating the cost of LiteLLM usage.
477
+ capture_message_content: Flag indicating whether to trace the actual content.
478
+
423
479
  Returns:
424
480
  A function that wraps the embeddings method to add telemetry.
425
481
  """
@@ -441,79 +497,94 @@ def aembedding(gen_ai_endpoint, version, environment, application_name,
441
497
  The response from the original 'embeddings' method.
442
498
  """
443
499
 
444
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
500
+ server_address, server_port = 'NOT_FOUND', 'NOT_FOUND'
501
+ request_model = kwargs.get('model', 'text-embedding-ada-002')
502
+
503
+ span_name = f'{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}'
504
+
505
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
506
+ start_time = time.time()
445
507
  response = await wrapped(*args, **kwargs)
508
+ end_time = time.time()
509
+
446
510
  response_dict = response_as_dict(response)
447
511
  try:
512
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
513
+
448
514
  # Calculate cost of the operation
449
- cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
450
- pricing_info, response_dict.get('usage').get('prompt_tokens'))
515
+ cost = get_embed_model_cost(request_model,
516
+ pricing_info, input_tokens)
451
517
 
452
- # Set Span attributes
453
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
454
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
455
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
518
+ # Set Span attributes (OTel Semconv)
519
+ span.set_attribute(TELEMETRY_SDK_NAME, 'openlit')
456
520
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
457
521
  SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
458
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
459
- gen_ai_endpoint)
460
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
461
- environment)
462
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
463
- application_name)
522
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
523
+ SemanticConvetion.GEN_AI_SYSTEM_LITELLM)
464
524
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
465
- kwargs.get("model", "text-embedding-ada-002"))
525
+ request_model)
466
526
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
467
- kwargs.get("encoding_format", "float"))
468
- # span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
469
- # kwargs.get("dimensions", "null"))
470
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
471
- kwargs.get("user", ""))
527
+ [kwargs.get('encoding_format', 'float')])
528
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
529
+ response_dict.get('model'))
530
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
531
+ server_address)
532
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
533
+ server_port)
472
534
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
473
- response_dict.get('usage').get('prompt_tokens'))
535
+ input_tokens)
536
+
537
+ # Set Span attributes (Extras)
538
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
539
+ environment)
540
+ span.set_attribute(SERVICE_NAME,
541
+ application_name)
542
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
543
+ kwargs.get('user', ''))
474
544
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
475
- response_dict.get('usage').get('total_tokens'))
545
+ input_tokens)
476
546
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
477
547
  cost)
478
- if trace_content:
548
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
549
+ version)
550
+
551
+ if capture_message_content:
479
552
  span.add_event(
480
553
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
481
554
  attributes={
482
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
555
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get('input', '')),
483
556
  },
484
557
  )
485
558
 
486
559
  span.set_status(Status(StatusCode.OK))
487
560
 
488
561
  if disable_metrics is False:
489
- attributes = {
490
- TELEMETRY_SDK_NAME:
491
- "openlit",
492
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
493
- application_name,
494
- SemanticConvetion.GEN_AI_SYSTEM:
495
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
496
- SemanticConvetion.GEN_AI_ENVIRONMENT:
497
- environment,
498
- SemanticConvetion.GEN_AI_OPERATION:
499
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
500
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
501
- kwargs.get("model", "text-embedding-ada-002")
502
- }
503
-
504
- metrics["genai_requests"].add(1, attributes)
505
- metrics["genai_total_tokens"].add(
506
- response_dict.get('usage').get('total_tokens'), attributes)
507
- metrics["genai_prompt_tokens"].add(
508
- response_dict.get('usage').get('prompt_tokens'), attributes)
509
- metrics["genai_cost"].record(cost, attributes)
562
+ attributes = create_metrics_attributes(
563
+ service_name=application_name,
564
+ deployment_environment=environment,
565
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
566
+ system=SemanticConvetion.GEN_AI_SYSTEM_LITELLM,
567
+ request_model=request_model,
568
+ server_address=server_address,
569
+ server_port=server_port,
570
+ response_model=response_dict.get('model'),
571
+ )
572
+ metrics['genai_client_usage_tokens'].record(
573
+ input_tokens, attributes
574
+ )
575
+ metrics['genai_client_operation_duration'].record(
576
+ end_time - start_time, attributes
577
+ )
578
+ metrics['genai_requests'].add(1, attributes)
579
+ metrics['genai_prompt_tokens'].add(input_tokens, attributes)
580
+ metrics['genai_cost'].record(cost, attributes)
510
581
 
511
582
  # Return original response
512
583
  return response
513
584
 
514
585
  except Exception as e:
515
586
  handle_exception(span, e)
516
- logger.error("Error in trace creation: %s", e)
587
+ logger.error('Error in trace creation: %s', e)
517
588
 
518
589
  # Return original response
519
590
  return response