openlit 1.33.9__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. openlit/__helpers.py +5 -0
  2. openlit/__init__.py +3 -2
  3. openlit/instrumentation/ag2/ag2.py +3 -3
  4. openlit/instrumentation/ai21/ai21.py +1 -1
  5. openlit/instrumentation/ai21/async_ai21.py +1 -1
  6. openlit/instrumentation/anthropic/anthropic.py +1 -1
  7. openlit/instrumentation/anthropic/async_anthropic.py +1 -1
  8. openlit/instrumentation/astra/astra.py +5 -5
  9. openlit/instrumentation/astra/async_astra.py +5 -5
  10. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +3 -3
  11. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +3 -3
  12. openlit/instrumentation/chroma/chroma.py +5 -5
  13. openlit/instrumentation/cohere/async_cohere.py +1 -1
  14. openlit/instrumentation/cohere/cohere.py +2 -2
  15. openlit/instrumentation/controlflow/controlflow.py +3 -3
  16. openlit/instrumentation/crawl4ai/async_crawl4ai.py +3 -3
  17. openlit/instrumentation/crawl4ai/crawl4ai.py +3 -3
  18. openlit/instrumentation/crewai/crewai.py +4 -2
  19. openlit/instrumentation/dynamiq/dynamiq.py +3 -3
  20. openlit/instrumentation/elevenlabs/async_elevenlabs.py +1 -2
  21. openlit/instrumentation/elevenlabs/elevenlabs.py +1 -2
  22. openlit/instrumentation/embedchain/embedchain.py +5 -5
  23. openlit/instrumentation/firecrawl/firecrawl.py +3 -3
  24. openlit/instrumentation/gpt4all/__init__.py +2 -2
  25. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  26. openlit/instrumentation/gpu/__init__.py +5 -5
  27. openlit/instrumentation/groq/__init__.py +2 -2
  28. openlit/instrumentation/groq/async_groq.py +356 -240
  29. openlit/instrumentation/groq/groq.py +356 -240
  30. openlit/instrumentation/haystack/haystack.py +3 -3
  31. openlit/instrumentation/julep/async_julep.py +3 -3
  32. openlit/instrumentation/julep/julep.py +3 -3
  33. openlit/instrumentation/langchain/__init__.py +13 -7
  34. openlit/instrumentation/langchain/async_langchain.py +384 -0
  35. openlit/instrumentation/langchain/langchain.py +98 -490
  36. openlit/instrumentation/letta/letta.py +5 -3
  37. openlit/instrumentation/litellm/__init__.py +4 -5
  38. openlit/instrumentation/litellm/async_litellm.py +316 -245
  39. openlit/instrumentation/litellm/litellm.py +312 -241
  40. openlit/instrumentation/llamaindex/llamaindex.py +3 -3
  41. openlit/instrumentation/mem0/mem0.py +3 -3
  42. openlit/instrumentation/milvus/milvus.py +5 -5
  43. openlit/instrumentation/mistral/__init__.py +6 -6
  44. openlit/instrumentation/mistral/async_mistral.py +421 -248
  45. openlit/instrumentation/mistral/mistral.py +418 -244
  46. openlit/instrumentation/multion/async_multion.py +4 -2
  47. openlit/instrumentation/multion/multion.py +4 -2
  48. openlit/instrumentation/ollama/__init__.py +8 -30
  49. openlit/instrumentation/ollama/async_ollama.py +385 -417
  50. openlit/instrumentation/ollama/ollama.py +384 -417
  51. openlit/instrumentation/openai/async_openai.py +7 -9
  52. openlit/instrumentation/openai/openai.py +7 -9
  53. openlit/instrumentation/phidata/phidata.py +4 -2
  54. openlit/instrumentation/pinecone/pinecone.py +5 -5
  55. openlit/instrumentation/premai/__init__.py +2 -2
  56. openlit/instrumentation/premai/premai.py +262 -213
  57. openlit/instrumentation/qdrant/async_qdrant.py +5 -5
  58. openlit/instrumentation/qdrant/qdrant.py +5 -5
  59. openlit/instrumentation/reka/__init__.py +2 -2
  60. openlit/instrumentation/reka/async_reka.py +90 -52
  61. openlit/instrumentation/reka/reka.py +90 -52
  62. openlit/instrumentation/together/__init__.py +4 -4
  63. openlit/instrumentation/together/async_together.py +278 -236
  64. openlit/instrumentation/together/together.py +278 -236
  65. openlit/instrumentation/transformers/__init__.py +1 -1
  66. openlit/instrumentation/transformers/transformers.py +75 -44
  67. openlit/instrumentation/vertexai/__init__.py +14 -64
  68. openlit/instrumentation/vertexai/async_vertexai.py +329 -986
  69. openlit/instrumentation/vertexai/vertexai.py +329 -986
  70. openlit/instrumentation/vllm/__init__.py +1 -1
  71. openlit/instrumentation/vllm/vllm.py +62 -32
  72. openlit/semcov/__init__.py +3 -3
  73. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  74. openlit-1.33.10.dist-info/RECORD +122 -0
  75. openlit-1.33.9.dist-info/RECORD +0 -121
  76. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
  77. {openlit-1.33.9.dist-info → openlit-1.33.10.dist-info}/WHEEL +0 -0
@@ -1,30 +1,32 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment, too-many-branches
2
1
  """
3
2
  Module for monitoring Prem AI API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
- handle_exception,
11
- general_tokens,
12
10
  get_chat_model_cost,
13
11
  get_embed_model_cost,
14
- response_as_dict
12
+ general_tokens,
13
+ handle_exception,
14
+ calculate_ttft,
15
+ calculate_tbt,
16
+ create_metrics_attributes,
17
+ set_server_address_and_port
15
18
  )
16
19
  from openlit.semcov import SemanticConvetion
17
20
 
18
21
  # Initialize logger for logging potential issues and operations
19
22
  logger = logging.getLogger(__name__)
20
23
 
21
- def chat(gen_ai_endpoint, version, environment, application_name,
22
- tracer, pricing_info, trace_content, metrics, disable_metrics):
24
+ def chat(version, environment, application_name,
25
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
23
26
  """
24
27
  Generates a telemetry wrapper for chat completions to collect metrics.
25
28
 
26
29
  Args:
27
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
28
30
  version: Version of the monitoring package.
29
31
  environment: Deployment environment (e.g., production, staging).
30
32
  application_name: Name of the application using the PremAI API.
@@ -42,13 +44,22 @@ def chat(gen_ai_endpoint, version, environment, application_name,
42
44
  Wraps the response to collect message IDs and aggregated response.
43
45
  """
44
46
 
45
- def __init__(self, wrapped, span, kwargs, **args):
47
+ def __init__(self, wrapped, span, kwargs, server_address, server_port,**args):
46
48
  self.__wrapped__ = wrapped
47
49
  self._span = span
48
50
  self._llmresponse = ""
49
51
  self._response_id = ""
50
52
  self._args = args
51
53
  self._kwargs = kwargs
54
+ self._server_address = server_address
55
+ self._server_port = server_port
56
+ self._start_time = time.time()
57
+ self._end_time = None
58
+ self._timestamps = []
59
+ self._ttft = 0
60
+ self._tbt = 0
61
+ self._response_model = ''
62
+ self._finish_reason = ''
52
63
 
53
64
  def __enter__(self):
54
65
  # Using context management protocols (if needed)
@@ -64,6 +75,14 @@ def chat(gen_ai_endpoint, version, environment, application_name,
64
75
 
65
76
  def __iter__(self):
66
77
  try:
78
+ end_time = time.time()
79
+ # Record the timestamp for the current chunk
80
+ self._timestamps.append(end_time)
81
+
82
+ if len(self._timestamps) == 1:
83
+ # Calculate time to first chunk
84
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
85
+
67
86
  for chunk in self.__wrapped__:
68
87
  # Assuming `chunk` has similar structure as 'ChatCompletionResponseStream'
69
88
  if chunk.choices:
@@ -72,7 +91,11 @@ def chat(gen_ai_endpoint, version, environment, application_name,
72
91
  if first_choice.delta.get('content'):
73
92
  self._llmresponse += first_choice.delta.get('content')
74
93
 
75
- self._response_id = chunk.id
94
+ if chunk.choices[0].finish_reason:
95
+ self._finish_reason = chunk.choices[0].finish_reason
96
+ self._response_id = chunk.id
97
+ self._response_model = chunk.model
98
+
76
99
  if not chunk:
77
100
  # pylint: disable= stop-iteration-return
78
101
  raise StopIteration
@@ -81,6 +104,10 @@ def chat(gen_ai_endpoint, version, environment, application_name,
81
104
  finally:
82
105
  # Handling exception ensure observability without disrupting operation
83
106
  try:
107
+ self._end_time = time.time()
108
+ if len(self._timestamps) > 1:
109
+ self._tbt = calculate_tbt(self._timestamps)
110
+
84
111
  # Format 'messages' into a single string
85
112
  message_prompt = self._kwargs.get("messages", "")
86
113
  formatted_messages = []
@@ -90,7 +117,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
90
117
 
91
118
  if isinstance(content, list):
92
119
  content_str = ", ".join(
93
- # pylint: disable=line-too-long
94
120
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
95
121
  if "type" in item else f'text: {item["text"]}'
96
122
  for item in content
@@ -100,55 +126,79 @@ def chat(gen_ai_endpoint, version, environment, application_name,
100
126
  formatted_messages.append(f"{role}: {content}")
101
127
  prompt = "\n".join(formatted_messages)
102
128
 
129
+ request_model = self._kwargs.get("model", "gpt-4o-mini")
130
+
103
131
  # Calculate tokens using input prompt and aggregated response
104
- prompt_tokens = general_tokens(prompt,)
105
- completion_tokens = general_tokens(self._llmresponse)
132
+ input_tokens = general_tokens(prompt)
133
+ output_tokens = general_tokens(self._llmresponse)
106
134
 
107
135
  # Calculate cost of the operation
108
- cost = get_chat_model_cost(self._kwargs.get("model", "gpt-4o-mini"),
109
- pricing_info, prompt_tokens,
110
- completion_tokens)
111
- print(self._kwargs)
112
- # Set Span attributes
136
+ cost = get_chat_model_cost(request_model,
137
+ pricing_info, input_tokens,
138
+ output_tokens)
139
+
140
+ # Set Span attributes (OTel Semconv)
113
141
  self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
114
- self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
115
- SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
116
142
  self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
117
143
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
118
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
119
- gen_ai_endpoint)
144
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
145
+ SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
146
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
147
+ request_model)
148
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
149
+ self._kwargs.get("seed", ""))
150
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
151
+ self._server_port)
152
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
153
+ self._kwargs.get("frequency_penalty", 0.0))
154
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
155
+ self._kwargs.get("max_tokens", -1))
156
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
157
+ self._kwargs.get("presence_penalty", 0.0))
158
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
159
+ self._kwargs.get("stop", []))
160
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
161
+ self._kwargs.get("temperature", 1.0))
162
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
163
+ self._kwargs.get("top_p", 1.0))
164
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
165
+ [self._finish_reason])
120
166
  self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
121
167
  self._response_id)
122
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
168
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
169
+ self._response_model)
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
171
+ input_tokens)
172
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
173
+ output_tokens)
174
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
175
+ self._server_address)
176
+ if isinstance(self._llmresponse, str):
177
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
178
+ "text")
179
+ else:
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
181
+ "json")
182
+
183
+ # Set Span attributes (Extra)
184
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
123
185
  environment)
124
- self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
186
+ self._span.set_attribute(SERVICE_NAME,
125
187
  application_name)
126
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
127
- self._kwargs.get("model", "gpt-4o-mini"))
128
188
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
129
189
  self._kwargs.get("user", ""))
130
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
131
- self._kwargs.get("top_p", 1.0))
132
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
133
- self._kwargs.get("max_tokens", -1))
134
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
135
- self._kwargs.get("temperature", 1.0))
136
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
137
- self._kwargs.get("presence_penalty", 0.0))
138
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
139
- self._kwargs.get("frequency_penalty", 0.0))
140
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
141
- self._kwargs.get("seed", ""))
142
190
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
143
191
  True)
144
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
145
- prompt_tokens)
146
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
147
- completion_tokens)
148
192
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
149
- prompt_tokens + completion_tokens)
193
+ input_tokens + output_tokens)
150
194
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
151
195
  cost)
196
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
197
+ self._tbt)
198
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
199
+ self._ttft)
200
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
201
+ version)
152
202
  if trace_content:
153
203
  self._span.add_event(
154
204
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -162,31 +212,35 @@ def chat(gen_ai_endpoint, version, environment, application_name,
162
212
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
163
213
  },
164
214
  )
165
-
166
215
  self._span.set_status(Status(StatusCode.OK))
167
216
 
168
217
  if disable_metrics is False:
169
- attributes = {
170
- TELEMETRY_SDK_NAME:
171
- "openlit",
172
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
173
- application_name,
174
- SemanticConvetion.GEN_AI_SYSTEM:
175
- SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
176
- SemanticConvetion.GEN_AI_ENVIRONMENT:
177
- environment,
178
- SemanticConvetion.GEN_AI_OPERATION:
179
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
180
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
181
- self._kwargs.get("model", "gpt-3.5-turbo")
182
- }
218
+ attributes = create_metrics_attributes(
219
+ service_name=application_name,
220
+ deployment_environment=environment,
221
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
222
+ system=SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
223
+ request_model=request_model,
224
+ server_address=self._server_address,
225
+ server_port=self._server_port,
226
+ response_model=self._response_model,
227
+ )
183
228
 
184
- metrics["genai_requests"].add(1, attributes)
185
- metrics["genai_total_tokens"].add(
186
- prompt_tokens + completion_tokens, attributes
229
+ metrics["genai_client_usage_tokens"].record(
230
+ input_tokens + output_tokens, attributes
231
+ )
232
+ metrics["genai_client_operation_duration"].record(
233
+ self._end_time - self._start_time, attributes
234
+ )
235
+ metrics["genai_server_tbt"].record(
236
+ self._tbt, attributes
187
237
  )
188
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
189
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
238
+ metrics["genai_server_ttft"].record(
239
+ self._ttft, attributes
240
+ )
241
+ metrics["genai_requests"].add(1, attributes)
242
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
243
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
190
244
  metrics["genai_cost"].record(cost, attributes)
191
245
 
192
246
  except Exception as e:
@@ -214,22 +268,25 @@ def chat(gen_ai_endpoint, version, environment, application_name,
214
268
 
215
269
  # Check if streaming is enabled for the API call
216
270
  streaming = kwargs.get("stream", False)
271
+ server_address, server_port = set_server_address_and_port(instance, "app.premai.io", 443)
272
+ request_model = kwargs.get("model", "gpt-4o-mini")
273
+
274
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
217
275
 
218
276
  # pylint: disable=no-else-return
219
277
  if streaming:
220
278
  # Special handling for streaming response to accommodate the nature of data flow
221
279
  awaited_wrapped = wrapped(*args, **kwargs)
222
- span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
280
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
223
281
 
224
- return TracedSyncStream(awaited_wrapped, span, kwargs)
282
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
225
283
 
226
284
  # Handling for non-streaming responses
227
285
  else:
228
- # pylint: disable=line-too-long
229
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
286
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
287
+ start_time = time.time()
230
288
  response = wrapped(*args, **kwargs)
231
-
232
- response_dict = response_as_dict(response)
289
+ end_time = time.time()
233
290
 
234
291
  try:
235
292
  # Format 'messages' into a single string
@@ -241,7 +298,6 @@ def chat(gen_ai_endpoint, version, environment, application_name,
241
298
 
242
299
  if isinstance(content, list):
243
300
  content_str = ", ".join(
244
- # pylint: disable=line-too-long
245
301
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
246
302
  if "type" in item else f'text: {item["text"]}'
247
303
  for item in content
@@ -251,38 +307,62 @@ def chat(gen_ai_endpoint, version, environment, application_name,
251
307
  formatted_messages.append(f"{role}: {content}")
252
308
  prompt = "\n".join(formatted_messages)
253
309
 
254
- # Set base span attribues
310
+ input_tokens = response.usage.prompt_tokens
311
+ output_tokens = response.usage.completion_tokens
312
+
313
+ # Calculate cost of the operation
314
+ cost = get_chat_model_cost(request_model,
315
+ pricing_info, input_tokens,
316
+ output_tokens)
317
+
318
+ # Set base span attribues (OTel Semconv)
255
319
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
256
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
257
- SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
258
320
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
259
321
  SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
260
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
261
- gen_ai_endpoint)
262
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
263
- response_dict.additional_properties["id"])
264
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
265
- environment)
266
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
267
- application_name)
322
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
323
+ SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
268
324
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
269
- kwargs.get("model", "gpt-3.5-turbo"))
270
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
271
- kwargs.get("top_p", 1.0))
325
+ request_model)
326
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
327
+ server_port)
328
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
329
+ kwargs.get("frequency_penalty", 0.0))
272
330
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
273
331
  kwargs.get("max_tokens", -1))
274
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
275
- kwargs.get("user", ""))
276
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
277
- kwargs.get("temperature", 1.0))
278
332
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
279
333
  kwargs.get("presence_penalty", 0.0))
280
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
281
- kwargs.get("frequency_penalty", 0.0))
282
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
283
- kwargs.get("seed", ""))
334
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
335
+ kwargs.get("stop", []))
336
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
337
+ kwargs.get("temperature", 1.0))
338
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
339
+ kwargs.get("top_p", 1.0))
340
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
341
+ response.additional_properties.get('id'))
342
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
343
+ response.model)
344
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
345
+ input_tokens)
346
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
347
+ output_tokens)
348
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
349
+ server_address)
350
+
351
+ # Set base span attribues (Extras)
352
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
353
+ environment)
354
+ span.set_attribute(SERVICE_NAME,
355
+ application_name)
284
356
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
285
357
  False)
358
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
359
+ input_tokens + output_tokens)
360
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
361
+ cost)
362
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
363
+ end_time - start_time)
364
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
365
+ version)
286
366
  if trace_content:
287
367
  span.add_event(
288
368
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -290,94 +370,50 @@ def chat(gen_ai_endpoint, version, environment, application_name,
290
370
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
291
371
  },
292
372
  )
293
-
294
- # Set span attributes when tools is not passed to the function call
295
- if "tools" not in kwargs:
296
- # Calculate cost of the operation
297
- cost = get_chat_model_cost(kwargs.get("model", "gpt-4o-mini"),
298
- pricing_info, response_dict.usage.prompt_tokens,
299
- response_dict.usage.completion_tokens)
300
-
301
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
302
- response_dict.usage.prompt_tokens)
303
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
304
- response_dict.usage.completion_tokens)
305
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
306
- response_dict.usage.total_tokens)
307
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
308
- [response_dict.choices[0].finish_reason])
309
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
310
- cost)
311
-
312
- # Set span attributes for when n = 1 (default)
313
- if "n" not in kwargs or kwargs["n"] == 1:
314
- if trace_content:
315
- span.add_event(
316
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
317
- attributes={
318
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.choices[0].message.content,
319
- },
320
- )
321
-
322
- # Set span attributes for when n > 0
323
- else:
324
- i = 0
325
- while i < kwargs["n"] and trace_content is True:
326
- attribute_name = f"gen_ai.content.completion.{i}"
327
- span.add_event(
328
- name=attribute_name,
329
- attributes={
330
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.choices[i].message.content,
331
- },
332
- )
333
- i += 1
334
-
335
- # Return original response
336
- return response
337
-
338
- # Set span attributes when tools is passed to the function call
339
- elif "tools" in kwargs:
340
- # Calculate cost of the operation
341
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
342
- pricing_info, response_dict.usage.prompt_tokens,
343
- response_dict.usage.completion_tokens)
344
373
  span.add_event(
345
374
  name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
346
375
  attributes={
347
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
376
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response.choices[0].message.content),
348
377
  },
349
378
  )
350
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
351
- response_dict.usage.prompt_tokens)
352
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
353
- response_dict.usage.completion_tokens)
354
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
355
- response_dict.usage.total_tokens)
356
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
357
- cost)
379
+
380
+ if kwargs.get('tools'):
381
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
382
+ str(response.choices[0].message.tool_calls))
383
+
384
+ if kwargs.get('response_format', '') != '':
385
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
386
+ "json")
387
+ else:
388
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
389
+ "text")
358
390
 
359
391
  span.set_status(Status(StatusCode.OK))
360
392
 
361
393
  if disable_metrics is False:
362
- attributes = {
363
- TELEMETRY_SDK_NAME:
364
- "openlit",
365
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
366
- application_name,
367
- SemanticConvetion.GEN_AI_SYSTEM:
368
- SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
369
- SemanticConvetion.GEN_AI_ENVIRONMENT:
370
- environment,
371
- SemanticConvetion.GEN_AI_OPERATION:
372
- SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
373
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
374
- kwargs.get("model", "gpt-3.5-turbo")
375
- }
394
+ attributes = create_metrics_attributes(
395
+ service_name=application_name,
396
+ deployment_environment=environment,
397
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
398
+ system=SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
399
+ request_model=request_model,
400
+ server_address=server_address,
401
+ server_port=server_port,
402
+ response_model=response.model,
403
+ )
376
404
 
405
+ metrics["genai_client_usage_tokens"].record(
406
+ input_tokens + output_tokens, attributes
407
+ )
408
+ metrics["genai_client_operation_duration"].record(
409
+ end_time - start_time, attributes
410
+ )
411
+ metrics["genai_server_ttft"].record(
412
+ end_time - start_time, attributes
413
+ )
377
414
  metrics["genai_requests"].add(1, attributes)
378
- metrics["genai_total_tokens"].add(response_dict.usage.total_tokens, attributes)
379
- metrics["genai_completion_tokens"].add(response_dict.usage.completion_tokens, attributes)
380
- metrics["genai_prompt_tokens"].add(response_dict.usage.prompt_tokens, attributes)
415
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
416
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
381
417
  metrics["genai_cost"].record(cost, attributes)
382
418
 
383
419
  # Return original response
@@ -392,18 +428,17 @@ def chat(gen_ai_endpoint, version, environment, application_name,
392
428
 
393
429
  return wrapper
394
430
 
395
- def embedding(gen_ai_endpoint, version, environment, application_name,
431
+ def embedding(version, environment, application_name,
396
432
  tracer, pricing_info, trace_content, metrics, disable_metrics):
397
433
  """
398
434
  Generates a telemetry wrapper for embeddings to collect metrics.
399
435
 
400
436
  Args:
401
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
402
437
  version: Version of the monitoring package.
403
438
  environment: Deployment environment (e.g., production, staging).
404
- application_name: Name of the application using the Prem AI API.
439
+ application_name: Name of the application using the PremAI API.
405
440
  tracer: OpenTelemetry tracer for creating spans.
406
- pricing_info: Information used for calculating the cost of Prem AI usage.
441
+ pricing_info: Information used for calculating the cost of PremAI usage.
407
442
  trace_content: Flag indicating whether to trace the actual content.
408
443
 
409
444
  Returns:
@@ -427,71 +462,85 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
427
462
  The response from the original 'embeddings' method.
428
463
  """
429
464
 
430
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
465
+ server_address, server_port = set_server_address_and_port(instance, "app.premai.io", 443)
466
+ request_model = kwargs.get("model", "text-embedding-ada-002")
467
+
468
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
469
+
470
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
471
+ start_time = time.time()
431
472
  response = wrapped(*args, **kwargs)
432
- response_dict = response_as_dict(response)
473
+ end_time = time.time()
474
+
433
475
  try:
476
+ input_tokens = response.usage.prompt_tokens
477
+
434
478
  # Calculate cost of the operation
435
- cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
436
- pricing_info, response_dict.usage.prompt_tokens)
479
+ cost = get_embed_model_cost(request_model,
480
+ pricing_info, input_tokens)
437
481
 
438
- # Set Span attributes
482
+ # Set Span attributes (OTel Semconv)
439
483
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
440
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
441
- SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
442
484
  span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
443
485
  SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
444
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
445
- gen_ai_endpoint)
446
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
447
- environment)
448
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
449
- application_name)
486
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
487
+ SemanticConvetion.GEN_AI_SYSTEM_PREMAI)
450
488
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
451
- kwargs.get("model", "text-embedding-3-large"))
489
+ request_model)
452
490
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
453
- kwargs.get("encoding_format", "float"))
454
- # span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
455
- # kwargs.get("dimensions", "null"))
491
+ [kwargs.get('encoding_format', 'float')])
492
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
493
+ response.model)
494
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
495
+ server_address)
496
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
497
+ server_port)
498
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
499
+ input_tokens)
500
+
501
+ # Set Span attributes (Extras)
502
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
503
+ environment)
504
+ span.set_attribute(SERVICE_NAME,
505
+ application_name)
456
506
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
457
507
  kwargs.get("user", ""))
458
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
459
- response_dict.usage.prompt_tokens)
460
508
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
461
- response_dict.usage.total_tokens)
509
+ input_tokens)
462
510
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
463
511
  cost)
512
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
513
+ version)
514
+
464
515
  if trace_content:
465
516
  span.add_event(
466
517
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
467
518
  attributes={
468
- SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("input", ""),
519
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: str(kwargs.get("input", "")),
469
520
  },
470
521
  )
471
522
 
472
523
  span.set_status(Status(StatusCode.OK))
473
524
 
474
525
  if disable_metrics is False:
475
- attributes = {
476
- TELEMETRY_SDK_NAME:
477
- "openlit",
478
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
479
- application_name,
480
- SemanticConvetion.GEN_AI_SYSTEM:
481
- SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
482
- SemanticConvetion.GEN_AI_ENVIRONMENT:
483
- environment,
484
- SemanticConvetion.GEN_AI_OPERATION:
485
- SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
486
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
487
- kwargs.get("model", "text-embedding-ada-002")
488
- }
489
-
526
+ attributes = create_metrics_attributes(
527
+ service_name=application_name,
528
+ deployment_environment=environment,
529
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
530
+ system=SemanticConvetion.GEN_AI_SYSTEM_PREMAI,
531
+ request_model=request_model,
532
+ server_address=server_address,
533
+ server_port=server_port,
534
+ response_model=response.model,
535
+ )
536
+ metrics["genai_client_usage_tokens"].record(
537
+ input_tokens, attributes
538
+ )
539
+ metrics["genai_client_operation_duration"].record(
540
+ end_time - start_time, attributes
541
+ )
490
542
  metrics["genai_requests"].add(1, attributes)
491
- metrics["genai_total_tokens"].add(
492
- response_dict.usage.total_tokens, attributes)
493
- metrics["genai_prompt_tokens"].add(
494
- response_dict.usageprompt_tokens, attributes)
543
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
495
544
  metrics["genai_cost"].record(cost, attributes)
496
545
 
497
546
  # Return original response