openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. openlit/__helpers.py +88 -0
  2. openlit/__init__.py +4 -3
  3. openlit/instrumentation/ag2/ag2.py +5 -5
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +9 -9
  13. openlit/instrumentation/astra/async_astra.py +9 -9
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +9 -9
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +5 -5
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  26. openlit/instrumentation/crewai/crewai.py +6 -4
  27. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
  30. openlit/instrumentation/embedchain/embedchain.py +9 -9
  31. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/__init__.py +2 -2
  36. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  37. openlit/instrumentation/gpu/__init__.py +5 -5
  38. openlit/instrumentation/groq/__init__.py +2 -2
  39. openlit/instrumentation/groq/async_groq.py +356 -240
  40. openlit/instrumentation/groq/groq.py +356 -240
  41. openlit/instrumentation/haystack/haystack.py +5 -5
  42. openlit/instrumentation/julep/async_julep.py +5 -5
  43. openlit/instrumentation/julep/julep.py +5 -5
  44. openlit/instrumentation/langchain/__init__.py +13 -7
  45. openlit/instrumentation/langchain/async_langchain.py +384 -0
  46. openlit/instrumentation/langchain/langchain.py +105 -492
  47. openlit/instrumentation/letta/letta.py +11 -9
  48. openlit/instrumentation/litellm/__init__.py +4 -5
  49. openlit/instrumentation/litellm/async_litellm.py +318 -247
  50. openlit/instrumentation/litellm/litellm.py +314 -243
  51. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  52. openlit/instrumentation/mem0/mem0.py +5 -5
  53. openlit/instrumentation/milvus/milvus.py +9 -9
  54. openlit/instrumentation/mistral/__init__.py +6 -6
  55. openlit/instrumentation/mistral/async_mistral.py +423 -250
  56. openlit/instrumentation/mistral/mistral.py +420 -246
  57. openlit/instrumentation/multion/async_multion.py +6 -4
  58. openlit/instrumentation/multion/multion.py +6 -4
  59. openlit/instrumentation/ollama/__init__.py +8 -30
  60. openlit/instrumentation/ollama/async_ollama.py +385 -417
  61. openlit/instrumentation/ollama/ollama.py +384 -417
  62. openlit/instrumentation/openai/__init__.py +11 -230
  63. openlit/instrumentation/openai/async_openai.py +433 -410
  64. openlit/instrumentation/openai/openai.py +414 -394
  65. openlit/instrumentation/phidata/phidata.py +6 -4
  66. openlit/instrumentation/pinecone/pinecone.py +9 -9
  67. openlit/instrumentation/premai/__init__.py +2 -2
  68. openlit/instrumentation/premai/premai.py +262 -213
  69. openlit/instrumentation/qdrant/async_qdrant.py +9 -9
  70. openlit/instrumentation/qdrant/qdrant.py +9 -9
  71. openlit/instrumentation/reka/__init__.py +2 -2
  72. openlit/instrumentation/reka/async_reka.py +90 -52
  73. openlit/instrumentation/reka/reka.py +90 -52
  74. openlit/instrumentation/together/__init__.py +4 -4
  75. openlit/instrumentation/together/async_together.py +278 -236
  76. openlit/instrumentation/together/together.py +278 -236
  77. openlit/instrumentation/transformers/__init__.py +1 -1
  78. openlit/instrumentation/transformers/transformers.py +76 -45
  79. openlit/instrumentation/vertexai/__init__.py +14 -64
  80. openlit/instrumentation/vertexai/async_vertexai.py +330 -987
  81. openlit/instrumentation/vertexai/vertexai.py +330 -987
  82. openlit/instrumentation/vllm/__init__.py +1 -1
  83. openlit/instrumentation/vllm/vllm.py +66 -36
  84. openlit/otel/metrics.py +98 -7
  85. openlit/semcov/__init__.py +113 -80
  86. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  87. openlit-1.33.10.dist-info/RECORD +122 -0
  88. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
  89. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  90. openlit/instrumentation/openai/azure_openai.py +0 -898
  91. openlit-1.33.8.dist-info/RECORD +0 -122
  92. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -1,11 +1,11 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
2
1
  """
3
2
  Module for monitoring OpenAI API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
10
  get_chat_model_cost,
11
11
  get_embed_model_cost,
@@ -14,19 +14,22 @@ from openlit.__helpers import (
14
14
  openai_tokens,
15
15
  handle_exception,
16
16
  response_as_dict,
17
+ calculate_ttft,
18
+ calculate_tbt,
19
+ create_metrics_attributes,
20
+ set_server_address_and_port
17
21
  )
18
22
  from openlit.semcov import SemanticConvetion
19
23
 
20
24
  # Initialize logger for logging potential issues and operations
21
25
  logger = logging.getLogger(__name__)
22
26
 
23
- def chat_completions(gen_ai_endpoint, version, environment, application_name,
27
+ def chat_completions(version, environment, application_name,
24
28
  tracer, pricing_info, trace_content, metrics, disable_metrics):
25
29
  """
26
30
  Generates a telemetry wrapper for chat completions to collect metrics.
27
31
 
28
32
  Args:
29
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
30
33
  version: Version of the monitoring package.
31
34
  environment: Deployment environment (e.g., production, staging).
32
35
  application_name: Name of the application using the OpenAI API.
@@ -41,7 +44,7 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
41
44
  class TracedSyncStream:
42
45
  """
43
46
  Wrapper for streaming responses to collect metrics and trace data.
44
- Wraps the 'openai.AsyncStream' response to collect message IDs and aggregated response.
47
+ Wraps the response to collect message IDs and aggregated response.
45
48
 
46
49
  This class implements the '__aiter__' and '__anext__' methods that
47
50
  handle asynchronous streaming responses.
@@ -54,6 +57,8 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
54
57
  wrapped,
55
58
  span,
56
59
  kwargs,
60
+ server_address,
61
+ server_port,
57
62
  **args,
58
63
  ):
59
64
  self.__wrapped__ = wrapped
@@ -61,9 +66,20 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
61
66
  # Placeholder for aggregating streaming response
62
67
  self._llmresponse = ""
63
68
  self._response_id = ""
69
+ self._response_model = ""
70
+ self._finish_reason = ""
71
+ self._openai_response_service_tier = ""
72
+ self._openai_system_fingerprint = ""
64
73
 
65
74
  self._args = args
66
75
  self._kwargs = kwargs
76
+ self._start_time = time.time()
77
+ self._end_time = None
78
+ self._timestamps = []
79
+ self._ttft = 0
80
+ self._tbt = 0
81
+ self._server_address = server_address
82
+ self._server_port = server_port
67
83
 
68
84
  def __enter__(self):
69
85
  self.__wrapped__.__enter__()
@@ -82,6 +98,14 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
82
98
  def __next__(self):
83
99
  try:
84
100
  chunk = self.__wrapped__.__next__()
101
+ end_time = time.time()
102
+ # Record the timestamp for the current chunk
103
+ self._timestamps.append(end_time)
104
+
105
+ if len(self._timestamps) == 1:
106
+ # Calculate time to first chunk
107
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
108
+
85
109
  chunked = response_as_dict(chunk)
86
110
  # Collect message IDs and aggregated response from events
87
111
  if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
@@ -91,10 +115,18 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
91
115
  if content:
92
116
  self._llmresponse += content
93
117
  self._response_id = chunked.get('id')
118
+ self._response_model = chunked.get('model')
119
+ self._finish_reason = chunked.get('choices')[0].get('finish_reason')
120
+ self._openai_response_service_tier = chunked.get('service_tier')
121
+ self._openai_system_fingerprint = chunked.get('system_fingerprint')
94
122
  return chunk
95
123
  except StopIteration:
96
124
  # Handling exception ensure observability without disrupting operation
97
125
  try:
126
+ self._end_time = time.time()
127
+ if len(self._timestamps) > 1:
128
+ self._tbt = calculate_tbt(self._timestamps)
129
+
98
130
  # Format 'messages' into a single string
99
131
  message_prompt = self._kwargs.get("messages", "")
100
132
  formatted_messages = []
@@ -109,7 +141,6 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
109
141
  content_str_list.append(f'text: {item["text"]}')
110
142
  elif (item["type"] == "image_url" and
111
143
  not item["image_url"]["url"].startswith("data:")):
112
- # pylint: disable=line-too-long
113
144
  content_str_list.append(f'image_url: {item["image_url"]["url"]}')
114
145
  content_str = ", ".join(content_str_list)
115
146
  formatted_messages.append(f"{role}: {content_str}")
@@ -117,57 +148,87 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
117
148
  formatted_messages.append(f"{role}: {content}")
118
149
  prompt = "\n".join(formatted_messages)
119
150
 
151
+ request_model = self._kwargs.get("model", "gpt-4o")
152
+
120
153
  # Calculate tokens using input prompt and aggregated response
121
- prompt_tokens = openai_tokens(prompt,
122
- self._kwargs.get("model", "gpt-3.5-turbo"))
123
- completion_tokens = openai_tokens(self._llmresponse,
124
- self._kwargs.get("model", "gpt-3.5-turbo"))
154
+ input_tokens = openai_tokens(prompt,
155
+ request_model)
156
+ output_tokens = openai_tokens(self._llmresponse,
157
+ request_model)
125
158
 
126
159
  # Calculate cost of the operation
127
- cost = get_chat_model_cost(self._kwargs.get("model", "gpt-3.5-turbo"),
128
- pricing_info, prompt_tokens,
129
- completion_tokens)
160
+ cost = get_chat_model_cost(request_model,
161
+ pricing_info, input_tokens,
162
+ output_tokens)
130
163
 
131
- # Set Span attributes
164
+ # Set Span attributes (OTel Semconv)
132
165
  self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
166
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
167
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
133
168
  self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
134
169
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
135
- self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
136
- SemanticConvetion.GEN_AI_TYPE_CHAT)
137
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
138
- gen_ai_endpoint)
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
171
+ request_model)
172
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
173
+ self._kwargs.get("seed", ""))
174
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
175
+ self._server_port)
176
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
177
+ self._kwargs.get("frequency_penalty", 0.0))
178
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
179
+ self._kwargs.get("max_tokens", -1))
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
181
+ self._kwargs.get("presence_penalty", 0.0))
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
183
+ self._kwargs.get("stop", []))
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
185
+ self._kwargs.get("temperature", 1.0))
186
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
187
+ self._kwargs.get("top_p", 1.0))
188
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
189
+ [self._finish_reason])
139
190
  self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
140
191
  self._response_id)
141
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
192
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
193
+ self._response_model)
194
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
195
+ input_tokens)
196
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
197
+ output_tokens)
198
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
199
+ self._server_address)
200
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
201
+ self._kwargs.get("service_tier", "auto"))
202
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SERVICE_TIER,
203
+ self._openai_response_service_tier)
204
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
205
+ self._openai_system_fingerprint)
206
+ if isinstance(self._llmresponse, str):
207
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
208
+ "text")
209
+ else:
210
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
211
+ "json")
212
+
213
+ # Set Span attributes (Extra)
214
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
142
215
  environment)
143
- self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
216
+ self._span.set_attribute(SERVICE_NAME,
144
217
  application_name)
145
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
146
- self._kwargs.get("model", "gpt-3.5-turbo"))
147
218
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
148
219
  self._kwargs.get("user", ""))
149
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
150
- self._kwargs.get("top_p", 1.0))
151
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
152
- self._kwargs.get("max_tokens", -1))
153
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
154
- self._kwargs.get("temperature", 1.0))
155
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
156
- self._kwargs.get("presence_penalty", 0.0))
157
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
158
- self._kwargs.get("frequency_penalty", 0.0))
159
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
160
- self._kwargs.get("seed", ""))
161
220
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
162
221
  True)
163
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
164
- prompt_tokens)
165
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
166
- completion_tokens)
167
222
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
168
- prompt_tokens + completion_tokens)
223
+ input_tokens + output_tokens)
169
224
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
170
225
  cost)
226
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
227
+ self._tbt)
228
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
229
+ self._ttft)
230
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
231
+ version)
171
232
  if trace_content:
172
233
  self._span.add_event(
173
234
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -181,31 +242,35 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
181
242
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
182
243
  },
183
244
  )
184
-
185
245
  self._span.set_status(Status(StatusCode.OK))
186
246
 
187
247
  if disable_metrics is False:
188
- attributes = {
189
- TELEMETRY_SDK_NAME:
190
- "openlit",
191
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
192
- application_name,
193
- SemanticConvetion.GEN_AI_SYSTEM:
194
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
195
- SemanticConvetion.GEN_AI_ENVIRONMENT:
196
- environment,
197
- SemanticConvetion.GEN_AI_TYPE:
198
- SemanticConvetion.GEN_AI_TYPE_CHAT,
199
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
200
- self._kwargs.get("model", "gpt-3.5-turbo")
201
- }
248
+ attributes = create_metrics_attributes(
249
+ service_name=application_name,
250
+ deployment_environment=environment,
251
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
252
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
253
+ request_model=request_model,
254
+ server_address=self._server_address,
255
+ server_port=self._server_port,
256
+ response_model=self._response_model,
257
+ )
202
258
 
203
- metrics["genai_requests"].add(1, attributes)
204
- metrics["genai_total_tokens"].add(
205
- prompt_tokens + completion_tokens, attributes
259
+ metrics["genai_client_usage_tokens"].record(
260
+ input_tokens + output_tokens, attributes
261
+ )
262
+ metrics["genai_client_operation_duration"].record(
263
+ self._end_time - self._start_time, attributes
264
+ )
265
+ metrics["genai_server_tbt"].record(
266
+ self._tbt, attributes
206
267
  )
207
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
208
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
268
+ metrics["genai_server_ttft"].record(
269
+ self._ttft, attributes
270
+ )
271
+ metrics["genai_requests"].add(1, attributes)
272
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
273
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
209
274
  metrics["genai_cost"].record(cost, attributes)
210
275
 
211
276
  except Exception as e:
@@ -234,20 +299,25 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
234
299
 
235
300
  # Check if streaming is enabled for the API call
236
301
  streaming = kwargs.get("stream", False)
302
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
303
+ request_model = kwargs.get("model", "gpt-4o")
304
+
305
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
237
306
 
238
307
  # pylint: disable=no-else-return
239
308
  if streaming:
240
309
  # Special handling for streaming response to accommodate the nature of data flow
241
310
  awaited_wrapped = wrapped(*args, **kwargs)
242
- span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
311
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
243
312
 
244
- return TracedSyncStream(awaited_wrapped, span, kwargs)
313
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
245
314
 
246
315
  # Handling for non-streaming responses
247
316
  else:
248
- # pylint: disable=line-too-long
249
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
317
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
318
+ start_time = time.time()
250
319
  response = wrapped(*args, **kwargs)
320
+ end_time = time.time()
251
321
 
252
322
  response_dict = response_as_dict(response)
253
323
 
@@ -261,7 +331,6 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
261
331
 
262
332
  if isinstance(content, list):
263
333
  content_str = ", ".join(
264
- # pylint: disable=line-too-long
265
334
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
266
335
  if "type" in item else f'text: {item["text"]}'
267
336
  for item in content
@@ -271,38 +340,72 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
271
340
  formatted_messages.append(f"{role}: {content}")
272
341
  prompt = "\n".join(formatted_messages)
273
342
 
274
- # Set base span attribues
343
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
344
+ output_tokens = response_dict.get('usage').get('completion_tokens')
345
+
346
+ # Calculate cost of the operation
347
+ cost = get_chat_model_cost(request_model,
348
+ pricing_info, input_tokens,
349
+ output_tokens)
350
+
351
+ # Set base span attribues (OTel Semconv)
275
352
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
353
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
354
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
276
355
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
277
356
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
278
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
279
- SemanticConvetion.GEN_AI_TYPE_CHAT)
280
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
281
- gen_ai_endpoint)
357
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
358
+ request_model)
359
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
360
+ kwargs.get("seed", ""))
361
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
362
+ server_port)
363
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
364
+ kwargs.get("frequency_penalty", 0.0))
365
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
366
+ kwargs.get("max_tokens", -1))
367
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
368
+ kwargs.get("presence_penalty", 0.0))
369
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
370
+ kwargs.get("stop", []))
371
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
372
+ kwargs.get("temperature", 1.0))
373
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
374
+ kwargs.get("top_p", 1.0))
282
375
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
283
376
  response_dict.get("id"))
284
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
377
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
378
+ response_dict.get('model'))
379
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
380
+ input_tokens)
381
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
382
+ output_tokens)
383
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
384
+ server_address)
385
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
386
+ kwargs.get("service_tier", "auto"))
387
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SERVICE_TIER,
388
+ response_dict.get('service_tier'))
389
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
390
+ response_dict.get('system_fingerprint'))
391
+
392
+ # Set base span attribues (Extras)
393
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
285
394
  environment)
286
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
395
+ span.set_attribute(SERVICE_NAME,
287
396
  application_name)
288
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
289
- kwargs.get("model", "gpt-3.5-turbo"))
290
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
291
- kwargs.get("top_p", 1.0))
292
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
293
- kwargs.get("max_tokens", -1))
294
397
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
295
398
  kwargs.get("user", ""))
296
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
297
- kwargs.get("temperature", 1.0))
298
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
299
- kwargs.get("presence_penalty", 0.0))
300
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
301
- kwargs.get("frequency_penalty", 0.0))
302
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
303
- kwargs.get("seed", ""))
304
399
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
305
400
  False)
401
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
402
+ input_tokens + output_tokens)
403
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
404
+ cost)
405
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
406
+ end_time - start_time)
407
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
408
+ version)
306
409
  if trace_content:
307
410
  span.add_event(
308
411
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -311,93 +414,54 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
311
414
  },
312
415
  )
313
416
 
314
- # Set span attributes when tools is not passed to the function call
315
- if "tools" not in kwargs:
316
- # Calculate cost of the operation
317
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
318
- pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
319
- response_dict.get('usage', {}).get('completion_tokens', None))
320
-
321
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
322
- response_dict.get('usage', {}).get('prompt_tokens', None))
323
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
324
- response_dict.get('usage', {}).get('completion_tokens', None))
325
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
326
- response_dict.get('usage', {}).get('total_tokens', None))
417
+ for i in range(kwargs.get('n',1)):
327
418
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
328
- [response_dict.get('choices', [])[0].get('finish_reason', None)])
329
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
330
- cost)
331
-
332
- # Set span attributes for when n = 1 (default)
333
- if "n" not in kwargs or kwargs["n"] == 1:
334
- if trace_content:
335
- span.add_event(
336
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
337
- attributes={
338
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
339
- },
340
- )
341
-
342
- # Set span attributes for when n > 0
343
- else:
344
- i = 0
345
- while i < kwargs["n"] and trace_content is True:
346
- attribute_name = f"gen_ai.content.completion.{i}"
347
- span.add_event(
348
- name=attribute_name,
349
- attributes={
350
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
351
- },
352
- )
353
- i += 1
354
-
355
- # Return original response
356
- return response
357
-
358
- # Set span attributes when tools is passed to the function call
359
- elif "tools" in kwargs:
360
- # Calculate cost of the operation
361
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
362
- pricing_info, response_dict.get('usage').get('prompt_tokens'),
363
- response_dict.get('usage').get('completion_tokens'))
364
- span.add_event(
365
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
366
- attributes={
367
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
368
- },
369
- )
370
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
371
- response_dict.get('usage').get('prompt_tokens'))
372
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
373
- response_dict.get('usage').get('completion_tokens'))
374
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
375
- response_dict.get('usage').get('total_tokens'))
376
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
377
- cost)
419
+ [response_dict.get('choices')[i].get('finish_reason')])
420
+ if trace_content:
421
+ span.add_event(
422
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
423
+ attributes={
424
+ # pylint: disable=line-too-long
425
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
426
+ },
427
+ )
428
+ if kwargs.get('tools'):
429
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
430
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
431
+
432
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
433
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
434
+ "text")
435
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
436
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
437
+ "json")
378
438
 
379
439
  span.set_status(Status(StatusCode.OK))
380
440
 
381
441
  if disable_metrics is False:
382
- attributes = {
383
- TELEMETRY_SDK_NAME:
384
- "openlit",
385
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
386
- application_name,
387
- SemanticConvetion.GEN_AI_SYSTEM:
388
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
389
- SemanticConvetion.GEN_AI_ENVIRONMENT:
390
- environment,
391
- SemanticConvetion.GEN_AI_TYPE:
392
- SemanticConvetion.GEN_AI_TYPE_CHAT,
393
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
394
- kwargs.get("model", "gpt-3.5-turbo")
395
- }
442
+ attributes = create_metrics_attributes(
443
+ service_name=application_name,
444
+ deployment_environment=environment,
445
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
446
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
447
+ request_model=request_model,
448
+ server_address=server_address,
449
+ server_port=server_port,
450
+ response_model=response_dict.get('model'),
451
+ )
396
452
 
453
+ metrics["genai_client_usage_tokens"].record(
454
+ input_tokens + output_tokens, attributes
455
+ )
456
+ metrics["genai_client_operation_duration"].record(
457
+ end_time - start_time, attributes
458
+ )
459
+ metrics["genai_server_ttft"].record(
460
+ end_time - start_time, attributes
461
+ )
397
462
  metrics["genai_requests"].add(1, attributes)
398
- metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
399
- metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
400
- metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
463
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
464
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
401
465
  metrics["genai_cost"].record(cost, attributes)
402
466
 
403
467
  # Return original response
@@ -412,13 +476,12 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
412
476
 
413
477
  return wrapper
414
478
 
415
- def embedding(gen_ai_endpoint, version, environment, application_name,
479
+ def embedding(version, environment, application_name,
416
480
  tracer, pricing_info, trace_content, metrics, disable_metrics):
417
481
  """
418
482
  Generates a telemetry wrapper for embeddings to collect metrics.
419
483
 
420
484
  Args:
421
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
422
485
  version: Version of the monitoring package.
423
486
  environment: Deployment environment (e.g., production, staging).
424
487
  application_name: Name of the application using the OpenAI API.
@@ -447,40 +510,56 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
447
510
  The response from the original 'embeddings' method.
448
511
  """
449
512
 
450
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
513
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
514
+ request_model = kwargs.get("model", "text-embedding-ada-002")
515
+
516
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
517
+
518
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
519
+ start_time = time.time()
451
520
  response = wrapped(*args, **kwargs)
521
+ end_time = time.time()
522
+
452
523
  response_dict = response_as_dict(response)
453
524
  try:
525
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
526
+
454
527
  # Calculate cost of the operation
455
- cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
456
- pricing_info, response_dict.get('usage').get('prompt_tokens'))
528
+ cost = get_embed_model_cost(request_model,
529
+ pricing_info, input_tokens)
457
530
 
458
- # Set Span attributes
531
+ # Set Span attributes (OTel Semconv)
459
532
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
533
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
534
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
460
535
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
461
536
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
462
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
463
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
464
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
465
- gen_ai_endpoint)
466
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
537
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
538
+ request_model)
539
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
540
+ [kwargs.get('encoding_format', 'float')])
541
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
542
+ request_model)
543
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
544
+ server_address)
545
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
546
+ server_port)
547
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
548
+ input_tokens)
549
+
550
+ # Set Span attributes (Extras)
551
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
467
552
  environment)
468
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
553
+ span.set_attribute(SERVICE_NAME,
469
554
  application_name)
470
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
471
- kwargs.get("model", "text-embedding-ada-002"))
472
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
473
- kwargs.get("encoding_format", "float"))
474
- # span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
475
- # kwargs.get("dimensions", "null"))
476
555
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
477
556
  kwargs.get("user", ""))
478
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
479
- response_dict.get('usage').get('prompt_tokens'))
480
557
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
481
- response_dict.get('usage').get('total_tokens'))
558
+ input_tokens)
482
559
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
483
560
  cost)
561
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
562
+ version)
484
563
 
485
564
  if trace_content:
486
565
  span.add_event(
@@ -493,26 +572,24 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
493
572
  span.set_status(Status(StatusCode.OK))
494
573
 
495
574
  if disable_metrics is False:
496
- attributes = {
497
- TELEMETRY_SDK_NAME:
498
- "openlit",
499
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
500
- application_name,
501
- SemanticConvetion.GEN_AI_SYSTEM:
502
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
503
- SemanticConvetion.GEN_AI_ENVIRONMENT:
504
- environment,
505
- SemanticConvetion.GEN_AI_TYPE:
506
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
507
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
508
- kwargs.get("model", "text-embedding-ada-002")
509
- }
510
-
575
+ attributes = create_metrics_attributes(
576
+ service_name=application_name,
577
+ deployment_environment=environment,
578
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
579
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
580
+ request_model=request_model,
581
+ server_address=server_address,
582
+ server_port=server_port,
583
+ response_model=request_model,
584
+ )
585
+ metrics["genai_client_usage_tokens"].record(
586
+ input_tokens, attributes
587
+ )
588
+ metrics["genai_client_operation_duration"].record(
589
+ end_time - start_time, attributes
590
+ )
511
591
  metrics["genai_requests"].add(1, attributes)
512
- metrics["genai_total_tokens"].add(
513
- response_dict.get('usage').get('total_tokens'), attributes)
514
- metrics["genai_prompt_tokens"].add(
515
- response_dict.get('usage').get('prompt_tokens'), attributes)
592
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
516
593
  metrics["genai_cost"].record(cost, attributes)
517
594
 
518
595
  # Return original response
@@ -527,118 +604,12 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
527
604
 
528
605
  return wrapper
529
606
 
530
- def finetune(gen_ai_endpoint, version, environment, application_name,
531
- tracer, pricing_info, trace_content, metrics, disable_metrics):
532
- """
533
- Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
534
-
535
- Args:
536
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
537
- version: Version of the monitoring package.
538
- environment: Deployment environment (e.g., production, staging).
539
- application_name: Name of the application using the OpenAI API.
540
- tracer: OpenTelemetry tracer for creating spans.
541
- pricing_info: Information used for calculating the cost of OpenAI usage.
542
- trace_content: Flag indicating whether to trace the actual content.
543
-
544
- Returns:
545
- A function that wraps the fine tuning creation method to add telemetry.
546
- """
547
-
548
- def wrapper(wrapped, instance, args, kwargs):
549
- """
550
- Wraps the 'fine_tuning.jobs.create' API call to add telemetry.
551
-
552
- This collects metrics such as execution time, usage stats, and handles errors
553
- gracefully, adding details to the trace for observability.
554
-
555
- Args:
556
- wrapped: The original 'fine_tuning.jobs.create' method to be wrapped.
557
- instance: The instance of the class where the original method is defined.
558
- args: Positional arguments for the method.
559
- kwargs: Keyword arguments for the method.
560
-
561
- Returns:
562
- The response from the original 'fine_tuning.jobs.create' method.
563
- """
564
-
565
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
566
- response = wrapped(*args, **kwargs)
567
-
568
- # Handling exception ensure observability without disrupting operation
569
- try:
570
- # Set Span attributes
571
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
572
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
573
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
574
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
575
- SemanticConvetion.GEN_AI_TYPE_FINETUNING)
576
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
577
- gen_ai_endpoint)
578
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
579
- environment)
580
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
581
- application_name)
582
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
583
- kwargs.get("model", "gpt-3.5-turbo"))
584
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TRAINING_FILE,
585
- kwargs.get("training_file", ""))
586
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_VALIDATION_FILE,
587
- kwargs.get("validation_file", ""))
588
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_BATCH_SIZE,
589
- kwargs.get("hyperparameters.batch_size", "auto"))
590
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_LRM,
591
- kwargs.get("hyperparameters.learning_rate_multiplier",
592
- "auto"))
593
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_EPOCHS,
594
- kwargs.get("hyperparameters.n_epochs", "auto"))
595
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_SUFFIX,
596
- kwargs.get("suffix", ""))
597
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
598
- response.id)
599
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
600
- response.usage.prompt_tokens)
601
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_STATUS,
602
- response.status)
603
- span.set_status(Status(StatusCode.OK))
604
-
605
- if disable_metrics is False:
606
- attributes = {
607
- TELEMETRY_SDK_NAME:
608
- "openlit",
609
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
610
- application_name,
611
- SemanticConvetion.GEN_AI_SYSTEM:
612
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
613
- SemanticConvetion.GEN_AI_ENVIRONMENT:
614
- environment,
615
- SemanticConvetion.GEN_AI_TYPE:
616
- SemanticConvetion.GEN_AI_TYPE_FINETUNING,
617
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
618
- kwargs.get("model", "gpt-3.5-turbo")
619
- }
620
-
621
- metrics["genai_requests"].add(1, attributes)
622
-
623
- # Return original response
624
- return response
625
-
626
- except Exception as e:
627
- handle_exception(span, e)
628
- logger.error("Error in trace creation: %s", e)
629
-
630
- # Return original response
631
- return response
632
-
633
- return wrapper
634
-
635
- def image_generate(gen_ai_endpoint, version, environment, application_name,
607
+ def image_generate(version, environment, application_name,
636
608
  tracer, pricing_info, trace_content, metrics, disable_metrics):
637
609
  """
638
610
  Generates a telemetry wrapper for image generation to collect metrics.
639
611
 
640
612
  Args:
641
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
642
613
  version: Version of the monitoring package.
643
614
  environment: Deployment environment (e.g., production, staging).
644
615
  application_name: Name of the application using the OpenAI API.
@@ -667,8 +638,16 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
667
638
  The response from the original 'images.generate' method.
668
639
  """
669
640
 
670
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
641
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
642
+ request_model = kwargs.get("model", "dall-e-2")
643
+
644
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
645
+
646
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
647
+ start_time = time.time()
671
648
  response = wrapped(*args, **kwargs)
649
+ end_time = time.time()
650
+
672
651
  images_count = 0
673
652
 
674
653
  try:
@@ -679,27 +658,35 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
679
658
  image = "url"
680
659
 
681
660
  # Calculate cost of the operation
682
- cost = get_image_model_cost(kwargs.get("model", "dall-e-2"),
661
+ cost = get_image_model_cost(request_model,
683
662
  pricing_info, kwargs.get("size", "1024x1024"),
684
663
  kwargs.get("quality", "standard"))
685
664
 
686
665
  for items in response.data:
687
- # Set Span attributes
666
+ # Set Span attributes (OTel Semconv)
688
667
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
668
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
669
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
689
670
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
690
671
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
691
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
692
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
693
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
694
- gen_ai_endpoint)
672
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
673
+ request_model)
674
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
675
+ server_address)
676
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
677
+ server_port)
695
678
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
696
679
  response.created)
697
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
680
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
681
+ request_model)
682
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
683
+ "image")
684
+
685
+ # Set Span attributes (Extras)
686
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
698
687
  environment)
699
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
688
+ span.set_attribute(SERVICE_NAME,
700
689
  application_name)
701
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
702
- kwargs.get("model", "dall-e-2"))
703
690
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
704
691
  kwargs.get("size", "1024x1024"))
705
692
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
@@ -710,6 +697,9 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
710
697
  items.revised_prompt if items.revised_prompt else "")
711
698
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
712
699
  kwargs.get("user", ""))
700
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
701
+ version)
702
+
713
703
  if trace_content:
714
704
  span.add_event(
715
705
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -717,7 +707,7 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
717
707
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
718
708
  },
719
709
  )
720
- attribute_name = f"gen_ai.response.image.{images_count}"
710
+ attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
721
711
  span.add_event(
722
712
  name=attribute_name,
723
713
  attributes={
@@ -732,21 +722,20 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
732
722
  span.set_status(Status(StatusCode.OK))
733
723
 
734
724
  if disable_metrics is False:
735
- attributes = {
736
- TELEMETRY_SDK_NAME:
737
- "openlit",
738
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
739
- application_name,
740
- SemanticConvetion.GEN_AI_SYSTEM:
741
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
742
- SemanticConvetion.GEN_AI_ENVIRONMENT:
743
- environment,
744
- SemanticConvetion.GEN_AI_TYPE:
745
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
746
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
747
- kwargs.get("model", "dall-e-2")
748
- }
725
+ attributes = create_metrics_attributes(
726
+ service_name=application_name,
727
+ deployment_environment=environment,
728
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
729
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
730
+ request_model=request_model,
731
+ server_address=server_address,
732
+ server_port=server_port,
733
+ response_model=request_model,
734
+ )
749
735
 
736
+ metrics["genai_client_operation_duration"].record(
737
+ end_time - start_time, attributes
738
+ )
750
739
  metrics["genai_requests"].add(1, attributes)
751
740
  metrics["genai_cost"].record(cost, attributes)
752
741
 
@@ -762,13 +751,12 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
762
751
 
763
752
  return wrapper
764
753
 
765
- def image_variatons(gen_ai_endpoint, version, environment, application_name,
754
+ def image_variatons(version, environment, application_name,
766
755
  tracer, pricing_info, trace_content, metrics, disable_metrics):
767
756
  """
768
757
  Generates a telemetry wrapper for creating image variations to collect metrics.
769
758
 
770
759
  Args:
771
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
772
760
  version: Version of the monitoring package.
773
761
  environment: Deployment environment (e.g., production, staging).
774
762
  application_name: Name of the application using the OpenAI API.
@@ -797,8 +785,16 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
797
785
  The response from the original 'images.create.variations' method.
798
786
  """
799
787
 
800
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
788
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
789
+ request_model = kwargs.get("model", "dall-e-2")
790
+
791
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
792
+
793
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
794
+ start_time = time.time()
801
795
  response = wrapped(*args, **kwargs)
796
+ end_time = time.time()
797
+
802
798
  images_count = 0
803
799
 
804
800
  try:
@@ -809,34 +805,45 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
809
805
  image = "url"
810
806
 
811
807
  # Calculate cost of the operation
812
- cost = get_image_model_cost(kwargs.get("model", "dall-e-2"), pricing_info,
808
+ cost = get_image_model_cost(request_model, pricing_info,
813
809
  kwargs.get("size", "1024x1024"), "standard")
814
810
 
815
811
  for items in response.data:
816
- # Set Span attributes
812
+ # Set Span attributes (OTel Semconv)
817
813
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
814
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
815
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
818
816
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
819
817
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
820
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
821
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
822
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
823
- gen_ai_endpoint)
818
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
819
+ request_model)
820
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
821
+ server_address)
822
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
823
+ server_port)
824
824
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
825
825
  response.created)
826
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
826
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
827
+ request_model)
828
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
829
+ "image")
830
+
831
+ # Set Span attributes (Extras)
832
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
827
833
  environment)
828
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
834
+ span.set_attribute(SERVICE_NAME,
829
835
  application_name)
830
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
831
- kwargs.get("model", "dall-e-2"))
832
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
833
- kwargs.get("user", ""))
834
836
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
835
837
  kwargs.get("size", "1024x1024"))
836
838
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
837
839
  "standard")
840
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
841
+ kwargs.get("user", ""))
842
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
843
+ version)
844
+
838
845
  if trace_content:
839
- attribute_name = f"gen_ai.response.image.{images_count}"
846
+ attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
840
847
  span.add_event(
841
848
  name=attribute_name,
842
849
  attributes={
@@ -851,21 +858,20 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
851
858
  span.set_status(Status(StatusCode.OK))
852
859
 
853
860
  if disable_metrics is False:
854
- attributes = {
855
- TELEMETRY_SDK_NAME:
856
- "openlit",
857
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
858
- application_name,
859
- SemanticConvetion.GEN_AI_SYSTEM:
860
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
861
- SemanticConvetion.GEN_AI_ENVIRONMENT:
862
- environment,
863
- SemanticConvetion.GEN_AI_TYPE:
864
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
865
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
866
- kwargs.get("model", "dall-e-2")
867
- }
861
+ attributes = create_metrics_attributes(
862
+ service_name=application_name,
863
+ deployment_environment=environment,
864
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
865
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
866
+ request_model=request_model,
867
+ server_address=server_address,
868
+ server_port=server_port,
869
+ response_model=request_model,
870
+ )
868
871
 
872
+ metrics["genai_client_operation_duration"].record(
873
+ end_time - start_time, attributes
874
+ )
869
875
  metrics["genai_requests"].add(1, attributes)
870
876
  metrics["genai_cost"].record(cost, attributes)
871
877
 
@@ -881,13 +887,12 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
881
887
 
882
888
  return wrapper
883
889
 
884
- def audio_create(gen_ai_endpoint, version, environment, application_name,
890
+ def audio_create(version, environment, application_name,
885
891
  tracer, pricing_info, trace_content, metrics, disable_metrics):
886
892
  """
887
893
  Generates a telemetry wrapper for creating speech audio to collect metrics.
888
894
 
889
895
  Args:
890
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
891
896
  version: Version of the monitoring package.
892
897
  environment: Deployment environment (e.g., production, staging).
893
898
  application_name: Name of the application using the OpenAI API.
@@ -916,28 +921,42 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
916
921
  The response from the original 'audio.speech.create' method.
917
922
  """
918
923
 
919
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
924
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
925
+ request_model = kwargs.get("model", "tts-1")
926
+
927
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO} {request_model}"
928
+
929
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
930
+ start_time = time.time()
920
931
  response = wrapped(*args, **kwargs)
932
+ end_time = time.time()
921
933
 
922
934
  try:
923
935
  # Calculate cost of the operation
924
- cost = get_audio_model_cost(kwargs.get("model", "tts-1"),
936
+ cost = get_audio_model_cost(request_model,
925
937
  pricing_info, kwargs.get("input", ""))
926
938
 
927
939
  # Set Span attributes
928
940
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
941
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
942
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO)
929
943
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
930
944
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
931
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
932
- SemanticConvetion.GEN_AI_TYPE_AUDIO)
933
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
934
- gen_ai_endpoint)
935
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
945
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
946
+ request_model)
947
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
948
+ server_address)
949
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
950
+ server_port)
951
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
952
+ request_model)
953
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
954
+ "speech")
955
+
956
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
936
957
  environment)
937
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
958
+ span.set_attribute(SERVICE_NAME,
938
959
  application_name)
939
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
940
- kwargs.get("model", "tts-1"))
941
960
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_VOICE,
942
961
  kwargs.get("voice", "alloy"))
943
962
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
@@ -946,6 +965,8 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
946
965
  kwargs.get("speed", 1))
947
966
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
948
967
  cost)
968
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
969
+ version)
949
970
  if trace_content:
950
971
  span.add_event(
951
972
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -957,21 +978,20 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
957
978
  span.set_status(Status(StatusCode.OK))
958
979
 
959
980
  if disable_metrics is False:
960
- attributes = {
961
- TELEMETRY_SDK_NAME:
962
- "openlit",
963
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
964
- application_name,
965
- SemanticConvetion.GEN_AI_SYSTEM:
966
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
967
- SemanticConvetion.GEN_AI_ENVIRONMENT:
968
- environment,
969
- SemanticConvetion.GEN_AI_TYPE:
970
- SemanticConvetion.GEN_AI_TYPE_AUDIO,
971
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
972
- kwargs.get("model", "tts-1")
973
- }
981
+ attributes = create_metrics_attributes(
982
+ service_name=application_name,
983
+ deployment_environment=environment,
984
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO,
985
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
986
+ request_model=request_model,
987
+ server_address=server_address,
988
+ server_port=server_port,
989
+ response_model=request_model,
990
+ )
974
991
 
992
+ metrics["genai_client_operation_duration"].record(
993
+ end_time - start_time, attributes
994
+ )
975
995
  metrics["genai_requests"].add(1, attributes)
976
996
  metrics["genai_cost"].record(cost, attributes)
977
997