openlit 1.33.8__py3-none-any.whl → 1.33.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. openlit/__helpers.py +88 -0
  2. openlit/__init__.py +4 -3
  3. openlit/instrumentation/ag2/ag2.py +5 -5
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +9 -9
  13. openlit/instrumentation/astra/async_astra.py +9 -9
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +9 -9
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +5 -5
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +5 -5
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +5 -5
  26. openlit/instrumentation/crewai/crewai.py +6 -4
  27. openlit/instrumentation/dynamiq/dynamiq.py +5 -5
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +71 -46
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +71 -51
  30. openlit/instrumentation/embedchain/embedchain.py +9 -9
  31. openlit/instrumentation/firecrawl/firecrawl.py +5 -5
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/__init__.py +2 -2
  36. openlit/instrumentation/gpt4all/gpt4all.py +345 -220
  37. openlit/instrumentation/gpu/__init__.py +5 -5
  38. openlit/instrumentation/groq/__init__.py +2 -2
  39. openlit/instrumentation/groq/async_groq.py +356 -240
  40. openlit/instrumentation/groq/groq.py +356 -240
  41. openlit/instrumentation/haystack/haystack.py +5 -5
  42. openlit/instrumentation/julep/async_julep.py +5 -5
  43. openlit/instrumentation/julep/julep.py +5 -5
  44. openlit/instrumentation/langchain/__init__.py +13 -7
  45. openlit/instrumentation/langchain/async_langchain.py +384 -0
  46. openlit/instrumentation/langchain/langchain.py +105 -492
  47. openlit/instrumentation/letta/letta.py +11 -9
  48. openlit/instrumentation/litellm/__init__.py +4 -5
  49. openlit/instrumentation/litellm/async_litellm.py +318 -247
  50. openlit/instrumentation/litellm/litellm.py +314 -243
  51. openlit/instrumentation/llamaindex/llamaindex.py +5 -5
  52. openlit/instrumentation/mem0/mem0.py +5 -5
  53. openlit/instrumentation/milvus/milvus.py +9 -9
  54. openlit/instrumentation/mistral/__init__.py +6 -6
  55. openlit/instrumentation/mistral/async_mistral.py +423 -250
  56. openlit/instrumentation/mistral/mistral.py +420 -246
  57. openlit/instrumentation/multion/async_multion.py +6 -4
  58. openlit/instrumentation/multion/multion.py +6 -4
  59. openlit/instrumentation/ollama/__init__.py +8 -30
  60. openlit/instrumentation/ollama/async_ollama.py +385 -417
  61. openlit/instrumentation/ollama/ollama.py +384 -417
  62. openlit/instrumentation/openai/__init__.py +11 -230
  63. openlit/instrumentation/openai/async_openai.py +433 -410
  64. openlit/instrumentation/openai/openai.py +414 -394
  65. openlit/instrumentation/phidata/phidata.py +6 -4
  66. openlit/instrumentation/pinecone/pinecone.py +9 -9
  67. openlit/instrumentation/premai/__init__.py +2 -2
  68. openlit/instrumentation/premai/premai.py +262 -213
  69. openlit/instrumentation/qdrant/async_qdrant.py +9 -9
  70. openlit/instrumentation/qdrant/qdrant.py +9 -9
  71. openlit/instrumentation/reka/__init__.py +2 -2
  72. openlit/instrumentation/reka/async_reka.py +90 -52
  73. openlit/instrumentation/reka/reka.py +90 -52
  74. openlit/instrumentation/together/__init__.py +4 -4
  75. openlit/instrumentation/together/async_together.py +278 -236
  76. openlit/instrumentation/together/together.py +278 -236
  77. openlit/instrumentation/transformers/__init__.py +1 -1
  78. openlit/instrumentation/transformers/transformers.py +76 -45
  79. openlit/instrumentation/vertexai/__init__.py +14 -64
  80. openlit/instrumentation/vertexai/async_vertexai.py +330 -987
  81. openlit/instrumentation/vertexai/vertexai.py +330 -987
  82. openlit/instrumentation/vllm/__init__.py +1 -1
  83. openlit/instrumentation/vllm/vllm.py +66 -36
  84. openlit/otel/metrics.py +98 -7
  85. openlit/semcov/__init__.py +113 -80
  86. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/METADATA +1 -1
  87. openlit-1.33.10.dist-info/RECORD +122 -0
  88. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/WHEEL +1 -1
  89. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  90. openlit/instrumentation/openai/azure_openai.py +0 -898
  91. openlit-1.33.8.dist-info/RECORD +0 -122
  92. {openlit-1.33.8.dist-info → openlit-1.33.10.dist-info}/LICENSE +0 -0
@@ -1,11 +1,11 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
2
1
  """
3
2
  Module for monitoring OpenAI API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
10
  get_chat_model_cost,
11
11
  get_embed_model_cost,
@@ -14,19 +14,22 @@ from openlit.__helpers import (
14
14
  openai_tokens,
15
15
  handle_exception,
16
16
  response_as_dict,
17
+ calculate_ttft,
18
+ calculate_tbt,
19
+ create_metrics_attributes,
20
+ set_server_address_and_port
17
21
  )
18
22
  from openlit.semcov import SemanticConvetion
19
23
 
20
24
  # Initialize logger for logging potential issues and operations
21
25
  logger = logging.getLogger(__name__)
22
26
 
23
- def async_chat_completions(gen_ai_endpoint, version, environment, application_name,
27
+ def async_chat_completions(version, environment, application_name,
24
28
  tracer, pricing_info, trace_content, metrics, disable_metrics):
25
29
  """
26
30
  Generates a telemetry wrapper for chat completions to collect metrics.
27
31
 
28
32
  Args:
29
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
30
33
  version: Version of the monitoring package.
31
34
  environment: Deployment environment (e.g., production, staging).
32
35
  application_name: Name of the application using the OpenAI API.
@@ -41,7 +44,7 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
41
44
  class TracedAsyncStream:
42
45
  """
43
46
  Wrapper for streaming responses to collect metrics and trace data.
44
- Wraps the 'openai.AsyncStream' response to collect message IDs and aggregated response.
47
+ Wraps the response to collect message IDs and aggregated response.
45
48
 
46
49
  This class implements the '__aiter__' and '__anext__' methods that
47
50
  handle asynchronous streaming responses.
@@ -54,6 +57,8 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
54
57
  wrapped,
55
58
  span,
56
59
  kwargs,
60
+ server_address,
61
+ server_port,
57
62
  **args,
58
63
  ):
59
64
  self.__wrapped__ = wrapped
@@ -61,9 +66,20 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
61
66
  # Placeholder for aggregating streaming response
62
67
  self._llmresponse = ""
63
68
  self._response_id = ""
69
+ self._response_model = ""
70
+ self._finish_reason = ""
71
+ self._openai_response_service_tier = ""
72
+ self._openai_system_fingerprint = ""
64
73
 
65
74
  self._args = args
66
75
  self._kwargs = kwargs
76
+ self._start_time = time.time()
77
+ self._end_time = None
78
+ self._timestamps = []
79
+ self._ttft = 0
80
+ self._tbt = 0
81
+ self._server_address = server_address
82
+ self._server_port = server_port
67
83
 
68
84
  async def __aenter__(self):
69
85
  await self.__wrapped__.__aenter__()
@@ -82,6 +98,14 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
82
98
  async def __anext__(self):
83
99
  try:
84
100
  chunk = await self.__wrapped__.__anext__()
101
+ end_time = time.time()
102
+ # Record the timestamp for the current chunk
103
+ self._timestamps.append(end_time)
104
+
105
+ if len(self._timestamps) == 1:
106
+ # Calculate time to first chunk
107
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
108
+
85
109
  chunked = response_as_dict(chunk)
86
110
  # Collect message IDs and aggregated response from events
87
111
  if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
@@ -91,10 +115,18 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
91
115
  if content:
92
116
  self._llmresponse += content
93
117
  self._response_id = chunked.get('id')
118
+ self._response_model = chunked.get('model')
119
+ self._finish_reason = chunked.get('choices')[0].get('finish_reason')
120
+ self._openai_response_service_tier = chunked.get('service_tier')
121
+ self._openai_system_fingerprint = chunked.get('system_fingerprint')
94
122
  return chunk
95
123
  except StopAsyncIteration:
96
124
  # Handling exception ensure observability without disrupting operation
97
125
  try:
126
+ self._end_time = time.time()
127
+ if len(self._timestamps) > 1:
128
+ self._tbt = calculate_tbt(self._timestamps)
129
+
98
130
  # Format 'messages' into a single string
99
131
  message_prompt = self._kwargs.get("messages", "")
100
132
  formatted_messages = []
@@ -103,68 +135,100 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
103
135
  content = message["content"]
104
136
 
105
137
  if isinstance(content, list):
106
- content_str = ", ".join(
107
- # pylint: disable=line-too-long
108
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
109
- if "type" in item else f'text: {item["text"]}'
110
- for item in content
111
- )
138
+ content_str_list = []
139
+ for item in content:
140
+ if item["type"] == "text":
141
+ content_str_list.append(f'text: {item["text"]}')
142
+ elif (item["type"] == "image_url" and
143
+ not item["image_url"]["url"].startswith("data:")):
144
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
145
+ content_str = ", ".join(content_str_list)
112
146
  formatted_messages.append(f"{role}: {content_str}")
113
147
  else:
114
148
  formatted_messages.append(f"{role}: {content}")
115
149
  prompt = "\n".join(formatted_messages)
116
150
 
151
+ request_model = self._kwargs.get("model", "gpt-4o")
152
+
117
153
  # Calculate tokens using input prompt and aggregated response
118
- prompt_tokens = openai_tokens(prompt,
119
- self._kwargs.get("model", "gpt-3.5-turbo"))
120
- completion_tokens = openai_tokens(self._llmresponse,
121
- self._kwargs.get("model", "gpt-3.5-turbo"))
154
+ input_tokens = openai_tokens(prompt,
155
+ request_model)
156
+ output_tokens = openai_tokens(self._llmresponse,
157
+ request_model)
122
158
 
123
159
  # Calculate cost of the operation
124
- cost = get_chat_model_cost(self._kwargs.get("model", "gpt-3.5-turbo"),
125
- pricing_info, prompt_tokens,
126
- completion_tokens)
160
+ cost = get_chat_model_cost(request_model,
161
+ pricing_info, input_tokens,
162
+ output_tokens)
127
163
 
128
- # Set Span attributes
164
+ # Set Span attributes (OTel Semconv)
129
165
  self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
166
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
167
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
130
168
  self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
131
169
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
132
- self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
133
- SemanticConvetion.GEN_AI_TYPE_CHAT)
134
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
135
- gen_ai_endpoint)
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
171
+ request_model)
172
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
173
+ self._kwargs.get("seed", ""))
174
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
175
+ self._server_port)
176
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
177
+ self._kwargs.get("frequency_penalty", 0.0))
178
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
179
+ self._kwargs.get("max_tokens", -1))
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
181
+ self._kwargs.get("presence_penalty", 0.0))
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
183
+ self._kwargs.get("stop", []))
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
185
+ self._kwargs.get("temperature", 1.0))
186
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
187
+ self._kwargs.get("top_p", 1.0))
188
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
189
+ [self._finish_reason])
136
190
  self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
137
191
  self._response_id)
138
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
192
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
193
+ self._response_model)
194
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
195
+ input_tokens)
196
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
197
+ output_tokens)
198
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
199
+ self._server_address)
200
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
201
+ self._kwargs.get("service_tier", "auto"))
202
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SERVICE_TIER,
203
+ self._openai_response_service_tier)
204
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
205
+ self._openai_system_fingerprint)
206
+ if isinstance(self._llmresponse, str):
207
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
208
+ "text")
209
+ else:
210
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
211
+ "json")
212
+
213
+ # Set Span attributes (Extra)
214
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
139
215
  environment)
140
- self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
216
+ self._span.set_attribute(SERVICE_NAME,
141
217
  application_name)
142
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
143
- self._kwargs.get("model", "gpt-3.5-turbo"))
144
218
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
145
219
  self._kwargs.get("user", ""))
146
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
147
- self._kwargs.get("top_p", 1.0))
148
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
149
- self._kwargs.get("max_tokens", -1))
150
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
151
- self._kwargs.get("temperature", 1.0))
152
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
153
- self._kwargs.get("presence_penalty", 0.0))
154
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
155
- self._kwargs.get("frequency_penalty", 0.0))
156
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
157
- self._kwargs.get("seed", ""))
158
220
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
159
221
  True)
160
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
161
- prompt_tokens)
162
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
163
- completion_tokens)
164
222
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
165
- prompt_tokens + completion_tokens)
223
+ input_tokens + output_tokens)
166
224
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
167
225
  cost)
226
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
227
+ self._tbt)
228
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
229
+ self._ttft)
230
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
231
+ version)
168
232
  if trace_content:
169
233
  self._span.add_event(
170
234
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -178,31 +242,35 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
178
242
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
179
243
  },
180
244
  )
181
-
182
245
  self._span.set_status(Status(StatusCode.OK))
183
246
 
184
247
  if disable_metrics is False:
185
- attributes = {
186
- TELEMETRY_SDK_NAME:
187
- "openlit",
188
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
189
- application_name,
190
- SemanticConvetion.GEN_AI_SYSTEM:
191
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
192
- SemanticConvetion.GEN_AI_ENVIRONMENT:
193
- environment,
194
- SemanticConvetion.GEN_AI_TYPE:
195
- SemanticConvetion.GEN_AI_TYPE_CHAT,
196
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
197
- self._kwargs.get("model", "gpt-3.5-turbo")
198
- }
248
+ attributes = create_metrics_attributes(
249
+ service_name=application_name,
250
+ deployment_environment=environment,
251
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
252
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
253
+ request_model=request_model,
254
+ server_address=self._server_address,
255
+ server_port=self._server_port,
256
+ response_model=self._response_model,
257
+ )
199
258
 
200
- metrics["genai_requests"].add(1, attributes)
201
- metrics["genai_total_tokens"].add(
202
- prompt_tokens + completion_tokens, attributes
259
+ metrics["genai_client_usage_tokens"].record(
260
+ input_tokens + output_tokens, attributes
261
+ )
262
+ metrics["genai_client_operation_duration"].record(
263
+ self._end_time - self._start_time, attributes
203
264
  )
204
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
205
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
265
+ metrics["genai_server_tbt"].record(
266
+ self._tbt, attributes
267
+ )
268
+ metrics["genai_server_ttft"].record(
269
+ self._ttft, attributes
270
+ )
271
+ metrics["genai_requests"].add(1, attributes)
272
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
273
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
206
274
  metrics["genai_cost"].record(cost, attributes)
207
275
 
208
276
  except Exception as e:
@@ -231,20 +299,25 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
231
299
 
232
300
  # Check if streaming is enabled for the API call
233
301
  streaming = kwargs.get("stream", False)
302
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
303
+ request_model = kwargs.get("model", "gpt-4o")
304
+
305
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
234
306
 
235
307
  # pylint: disable=no-else-return
236
308
  if streaming:
237
309
  # Special handling for streaming response to accommodate the nature of data flow
238
310
  awaited_wrapped = await wrapped(*args, **kwargs)
239
- span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
311
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
240
312
 
241
- return TracedAsyncStream(awaited_wrapped, span, kwargs)
313
+ return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
242
314
 
243
315
  # Handling for non-streaming responses
244
316
  else:
245
- # pylint: disable=line-too-long
246
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
317
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
318
+ start_time = time.time()
247
319
  response = await wrapped(*args, **kwargs)
320
+ end_time = time.time()
248
321
 
249
322
  response_dict = response_as_dict(response)
250
323
 
@@ -258,7 +331,6 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
258
331
 
259
332
  if isinstance(content, list):
260
333
  content_str = ", ".join(
261
- # pylint: disable=line-too-long
262
334
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
263
335
  if "type" in item else f'text: {item["text"]}'
264
336
  for item in content
@@ -268,38 +340,72 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
268
340
  formatted_messages.append(f"{role}: {content}")
269
341
  prompt = "\n".join(formatted_messages)
270
342
 
271
- # Set base span attribues
343
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
344
+ output_tokens = response_dict.get('usage').get('completion_tokens')
345
+
346
+ # Calculate cost of the operation
347
+ cost = get_chat_model_cost(request_model,
348
+ pricing_info, input_tokens,
349
+ output_tokens)
350
+
351
+ # Set base span attribues (OTel Semconv)
272
352
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
353
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
354
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
273
355
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
274
356
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
275
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
276
- SemanticConvetion.GEN_AI_TYPE_CHAT)
277
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
278
- gen_ai_endpoint)
357
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
358
+ request_model)
359
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
360
+ kwargs.get("seed", ""))
361
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
362
+ server_port)
363
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
364
+ kwargs.get("frequency_penalty", 0.0))
365
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
366
+ kwargs.get("max_tokens", -1))
367
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
368
+ kwargs.get("presence_penalty", 0.0))
369
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
370
+ kwargs.get("stop", []))
371
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
372
+ kwargs.get("temperature", 1.0))
373
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
374
+ kwargs.get("top_p", 1.0))
279
375
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
280
376
  response_dict.get("id"))
281
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
377
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
378
+ response_dict.get('model'))
379
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
380
+ input_tokens)
381
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
382
+ output_tokens)
383
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
384
+ server_address)
385
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SERVICE_TIER,
386
+ kwargs.get("service_tier", "auto"))
387
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SERVICE_TIER,
388
+ response_dict.get('service_tier'))
389
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_SYSTEM_FINGERPRINT,
390
+ response_dict.get('system_fingerprint'))
391
+
392
+ # Set base span attribues (Extras)
393
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
282
394
  environment)
283
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
395
+ span.set_attribute(SERVICE_NAME,
284
396
  application_name)
285
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
286
- kwargs.get("model", "gpt-3.5-turbo"))
287
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
288
- kwargs.get("top_p", 1.0))
289
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
290
- kwargs.get("max_tokens", -1))
291
397
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
292
398
  kwargs.get("user", ""))
293
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
294
- kwargs.get("temperature", 1.0))
295
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
296
- kwargs.get("presence_penalty", 0.0))
297
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
298
- kwargs.get("frequency_penalty", 0.0))
299
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
300
- kwargs.get("seed", ""))
301
399
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
302
400
  False)
401
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
402
+ input_tokens + output_tokens)
403
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
404
+ cost)
405
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
406
+ end_time - start_time)
407
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
408
+ version)
303
409
  if trace_content:
304
410
  span.add_event(
305
411
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -308,93 +414,54 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
308
414
  },
309
415
  )
310
416
 
311
- # Set span attributes when tools is not passed to the function call
312
- if "tools" not in kwargs:
313
- # Calculate cost of the operation
314
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
315
- pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
316
- response_dict.get('usage', {}).get('completion_tokens', None))
317
-
318
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
319
- response_dict.get('usage', {}).get('prompt_tokens', None))
320
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
321
- response_dict.get('usage', {}).get('completion_tokens', None))
322
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
323
- response_dict.get('usage', {}).get('total_tokens', None))
417
+ for i in range(kwargs.get('n',1)):
324
418
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
325
- [response_dict.get('choices', [])[0].get('finish_reason', None)])
326
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
327
- cost)
328
-
329
- # Set span attributes for when n = 1 (default)
330
- if "n" not in kwargs or kwargs["n"] == 1:
331
- if trace_content:
332
- span.add_event(
333
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
334
- attributes={
335
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
336
- },
337
- )
338
-
339
- # Set span attributes for when n > 0
340
- else:
341
- i = 0
342
- while i < kwargs["n"] and trace_content is True:
343
- attribute_name = f"gen_ai.content.completion.{i}"
344
- span.add_event(
345
- name=attribute_name,
346
- attributes={
347
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
348
- },
349
- )
350
- i += 1
351
-
352
- # Return original response
353
- return response
354
-
355
- # Set span attributes when tools is passed to the function call
356
- elif "tools" in kwargs:
357
- # Calculate cost of the operation
358
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
359
- pricing_info, response_dict.get('usage').get('prompt_tokens'),
360
- response_dict.get('usage').get('completion_tokens'))
361
- span.add_event(
362
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
363
- attributes={
364
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
365
- },
366
- )
367
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
368
- response_dict.get('usage').get('prompt_tokens'))
369
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
370
- response_dict.get('usage').get('completion_tokens'))
371
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
372
- response_dict.get('usage').get('total_tokens'))
373
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
374
- cost)
419
+ [response_dict.get('choices')[i].get('finish_reason')])
420
+ if trace_content:
421
+ span.add_event(
422
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
423
+ attributes={
424
+ # pylint: disable=line-too-long
425
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
426
+ },
427
+ )
428
+ if kwargs.get('tools'):
429
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
430
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
431
+
432
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
433
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
434
+ "text")
435
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
436
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
437
+ "json")
375
438
 
376
439
  span.set_status(Status(StatusCode.OK))
377
440
 
378
441
  if disable_metrics is False:
379
- attributes = {
380
- TELEMETRY_SDK_NAME:
381
- "openlit",
382
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
383
- application_name,
384
- SemanticConvetion.GEN_AI_SYSTEM:
385
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
386
- SemanticConvetion.GEN_AI_ENVIRONMENT:
387
- environment,
388
- SemanticConvetion.GEN_AI_TYPE:
389
- SemanticConvetion.GEN_AI_TYPE_CHAT,
390
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
391
- kwargs.get("model", "gpt-3.5-turbo")
392
- }
442
+ attributes = create_metrics_attributes(
443
+ service_name=application_name,
444
+ deployment_environment=environment,
445
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
446
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
447
+ request_model=request_model,
448
+ server_address=server_address,
449
+ server_port=server_port,
450
+ response_model=response_dict.get('model'),
451
+ )
393
452
 
453
+ metrics["genai_client_usage_tokens"].record(
454
+ input_tokens + output_tokens, attributes
455
+ )
456
+ metrics["genai_client_operation_duration"].record(
457
+ end_time - start_time, attributes
458
+ )
459
+ metrics["genai_server_ttft"].record(
460
+ end_time - start_time, attributes
461
+ )
394
462
  metrics["genai_requests"].add(1, attributes)
395
- metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
396
- metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
397
- metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
463
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
464
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
398
465
  metrics["genai_cost"].record(cost, attributes)
399
466
 
400
467
  # Return original response
@@ -409,20 +476,19 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
409
476
 
410
477
  return wrapper
411
478
 
412
- def async_embedding(gen_ai_endpoint, version, environment, application_name,
413
- tracer, pricing_info, trace_content, metrics, disable_metrics):
479
+ def async_embedding(version, environment, application_name,
480
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
414
481
  """
415
482
  Generates a telemetry wrapper for embeddings to collect metrics.
416
-
483
+
417
484
  Args:
418
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
419
485
  version: Version of the monitoring package.
420
486
  environment: Deployment environment (e.g., production, staging).
421
487
  application_name: Name of the application using the OpenAI API.
422
488
  tracer: OpenTelemetry tracer for creating spans.
423
489
  pricing_info: Information used for calculating the cost of OpenAI usage.
424
490
  trace_content: Flag indicating whether to trace the actual content.
425
-
491
+
426
492
  Returns:
427
493
  A function that wraps the embeddings method to add telemetry.
428
494
  """
@@ -444,40 +510,56 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
444
510
  The response from the original 'embeddings' method.
445
511
  """
446
512
 
447
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
513
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
514
+ request_model = kwargs.get("model", "text-embedding-ada-002")
515
+
516
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
517
+
518
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
519
+ start_time = time.time()
448
520
  response = await wrapped(*args, **kwargs)
521
+ end_time = time.time()
522
+
449
523
  response_dict = response_as_dict(response)
450
524
  try:
525
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
526
+
451
527
  # Calculate cost of the operation
452
- cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
453
- pricing_info, response_dict.get('usage').get('prompt_tokens'))
528
+ cost = get_embed_model_cost(request_model,
529
+ pricing_info, input_tokens)
454
530
 
455
- # Set Span attributes
531
+ # Set Span attributes (OTel Semconv)
456
532
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
533
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
534
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
457
535
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
458
536
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
459
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
460
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
461
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
462
- gen_ai_endpoint)
463
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
537
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
538
+ request_model)
539
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
540
+ [kwargs.get('encoding_format', 'float')])
541
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
542
+ request_model)
543
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
544
+ server_address)
545
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
546
+ server_port)
547
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
548
+ input_tokens)
549
+
550
+ # Set Span attributes (Extras)
551
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
464
552
  environment)
465
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
553
+ span.set_attribute(SERVICE_NAME,
466
554
  application_name)
467
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
468
- kwargs.get("model", "text-embedding-ada-002"))
469
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
470
- kwargs.get("encoding_format", "float"))
471
- # span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
472
- # kwargs.get("dimensions", "null"))
473
555
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
474
556
  kwargs.get("user", ""))
475
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
476
- response_dict.get('usage').get('prompt_tokens'))
477
557
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
478
- response_dict.get('usage').get('total_tokens'))
558
+ input_tokens)
479
559
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
480
560
  cost)
561
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
562
+ version)
481
563
 
482
564
  if trace_content:
483
565
  span.add_event(
@@ -490,26 +572,24 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
490
572
  span.set_status(Status(StatusCode.OK))
491
573
 
492
574
  if disable_metrics is False:
493
- attributes = {
494
- TELEMETRY_SDK_NAME:
495
- "openlit",
496
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
497
- application_name,
498
- SemanticConvetion.GEN_AI_SYSTEM:
499
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
500
- SemanticConvetion.GEN_AI_ENVIRONMENT:
501
- environment,
502
- SemanticConvetion.GEN_AI_TYPE:
503
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
504
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
505
- kwargs.get("model", "text-embedding-ada-002")
506
- }
507
-
575
+ attributes = create_metrics_attributes(
576
+ service_name=application_name,
577
+ deployment_environment=environment,
578
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
579
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
580
+ request_model=request_model,
581
+ server_address=server_address,
582
+ server_port=server_port,
583
+ response_model=request_model,
584
+ )
585
+ metrics["genai_client_usage_tokens"].record(
586
+ input_tokens, attributes
587
+ )
588
+ metrics["genai_client_operation_duration"].record(
589
+ end_time - start_time, attributes
590
+ )
508
591
  metrics["genai_requests"].add(1, attributes)
509
- metrics["genai_total_tokens"].add(
510
- response_dict.get('usage').get('total_tokens'), attributes)
511
- metrics["genai_prompt_tokens"].add(
512
- response_dict.get('usage').get('prompt_tokens'), attributes)
592
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
513
593
  metrics["genai_cost"].record(cost, attributes)
514
594
 
515
595
  # Return original response
@@ -524,125 +604,19 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
524
604
 
525
605
  return wrapper
526
606
 
527
- def async_finetune(gen_ai_endpoint, version, environment, application_name,
607
+ def async_image_generate(version, environment, application_name,
528
608
  tracer, pricing_info, trace_content, metrics, disable_metrics):
529
609
  """
530
- Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
531
-
532
- Args:
533
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
534
- version: Version of the monitoring package.
535
- environment: Deployment environment (e.g., production, staging).
536
- application_name: Name of the application using the OpenAI API.
537
- tracer: OpenTelemetry tracer for creating spans.
538
- pricing_info: Information used for calculating the cost of OpenAI usage.
539
- trace_content: Flag indicating whether to trace the actual content.
540
-
541
- Returns:
542
- A function that wraps the fine tuning creation method to add telemetry.
543
- """
544
-
545
- async def wrapper(wrapped, instance, args, kwargs):
546
- """
547
- Wraps the 'fine_tuning.jobs.create' API call to add telemetry.
548
-
549
- This collects metrics such as execution time, usage stats, and handles errors
550
- gracefully, adding details to the trace for observability.
551
-
552
- Args:
553
- wrapped: The original 'fine_tuning.jobs.create' method to be wrapped.
554
- instance: The instance of the class where the original method is defined.
555
- args: Positional arguments for the method.
556
- kwargs: Keyword arguments for the method.
557
-
558
- Returns:
559
- The response from the original 'fine_tuning.jobs.create' method.
560
- """
561
-
562
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
563
- response = await wrapped(*args, **kwargs)
564
-
565
- # Handling exception ensure observability without disrupting operation
566
- try:
567
- # Set Span attributes
568
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
569
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
570
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
571
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
572
- SemanticConvetion.GEN_AI_TYPE_FINETUNING)
573
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
574
- gen_ai_endpoint)
575
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
576
- environment)
577
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
578
- application_name)
579
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
580
- kwargs.get("model", "gpt-3.5-turbo"))
581
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TRAINING_FILE,
582
- kwargs.get("training_file", ""))
583
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_VALIDATION_FILE,
584
- kwargs.get("validation_file", ""))
585
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_BATCH_SIZE,
586
- kwargs.get("hyperparameters.batch_size", "auto"))
587
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_LRM,
588
- kwargs.get("hyperparameters.learning_rate_multiplier",
589
- "auto"))
590
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_EPOCHS,
591
- kwargs.get("hyperparameters.n_epochs", "auto"))
592
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_SUFFIX,
593
- kwargs.get("suffix", ""))
594
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
595
- response.id)
596
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
597
- response.usage.prompt_tokens)
598
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_STATUS,
599
- response.status)
600
- span.set_status(Status(StatusCode.OK))
601
-
602
- if disable_metrics is False:
603
- attributes = {
604
- TELEMETRY_SDK_NAME:
605
- "openlit",
606
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
607
- application_name,
608
- SemanticConvetion.GEN_AI_SYSTEM:
609
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
610
- SemanticConvetion.GEN_AI_ENVIRONMENT:
611
- environment,
612
- SemanticConvetion.GEN_AI_TYPE:
613
- SemanticConvetion.GEN_AI_TYPE_FINETUNING,
614
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
615
- kwargs.get("model", "gpt-3.5-turbo")
616
- }
617
-
618
- metrics["genai_requests"].add(1, attributes)
619
-
620
- # Return original response
621
- return response
622
-
623
- except Exception as e:
624
- handle_exception(span, e)
625
- logger.error("Error in trace creation: %s", e)
626
-
627
- # Return original response
628
- return response
629
-
630
- return wrapper
631
-
632
- def async_image_generate(gen_ai_endpoint, version, environment, application_name,
633
- tracer, pricing_info, trace_content, metrics, disable_metrics):
634
- """
635
610
  Generates a telemetry wrapper for image generation to collect metrics.
636
-
611
+
637
612
  Args:
638
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
639
613
  version: Version of the monitoring package.
640
614
  environment: Deployment environment (e.g., production, staging).
641
615
  application_name: Name of the application using the OpenAI API.
642
616
  tracer: OpenTelemetry tracer for creating spans.
643
617
  pricing_info: Information used for calculating the cost of OpenAI image generation.
644
618
  trace_content: Flag indicating whether to trace the input prompt and generated images.
645
-
619
+
646
620
  Returns:
647
621
  A function that wraps the image generation method to add telemetry.
648
622
  """
@@ -664,8 +638,16 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
664
638
  The response from the original 'images.generate' method.
665
639
  """
666
640
 
667
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
641
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
642
+ request_model = kwargs.get("model", "dall-e-2")
643
+
644
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
645
+
646
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
647
+ start_time = time.time()
668
648
  response = await wrapped(*args, **kwargs)
649
+ end_time = time.time()
650
+
669
651
  images_count = 0
670
652
 
671
653
  try:
@@ -676,27 +658,35 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
676
658
  image = "url"
677
659
 
678
660
  # Calculate cost of the operation
679
- cost = get_image_model_cost(kwargs.get("model", "dall-e-2"),
661
+ cost = get_image_model_cost(request_model,
680
662
  pricing_info, kwargs.get("size", "1024x1024"),
681
663
  kwargs.get("quality", "standard"))
682
664
 
683
665
  for items in response.data:
684
- # Set Span attributes
666
+ # Set Span attributes (OTel Semconv)
685
667
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
668
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
669
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
686
670
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
687
671
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
688
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
689
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
690
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
691
- gen_ai_endpoint)
672
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
673
+ request_model)
674
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
675
+ server_address)
676
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
677
+ server_port)
692
678
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
693
679
  response.created)
694
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
680
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
681
+ request_model)
682
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
683
+ "image")
684
+
685
+ # Set Span attributes (Extras)
686
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
695
687
  environment)
696
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
688
+ span.set_attribute(SERVICE_NAME,
697
689
  application_name)
698
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
699
- kwargs.get("model", "dall-e-2"))
700
690
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
701
691
  kwargs.get("size", "1024x1024"))
702
692
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
@@ -707,6 +697,9 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
707
697
  items.revised_prompt if items.revised_prompt else "")
708
698
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
709
699
  kwargs.get("user", ""))
700
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
701
+ version)
702
+
710
703
  if trace_content:
711
704
  span.add_event(
712
705
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -714,7 +707,7 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
714
707
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
715
708
  },
716
709
  )
717
- attribute_name = f"gen_ai.response.image.{images_count}"
710
+ attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
718
711
  span.add_event(
719
712
  name=attribute_name,
720
713
  attributes={
@@ -729,21 +722,20 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
729
722
  span.set_status(Status(StatusCode.OK))
730
723
 
731
724
  if disable_metrics is False:
732
- attributes = {
733
- TELEMETRY_SDK_NAME:
734
- "openlit",
735
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
736
- application_name,
737
- SemanticConvetion.GEN_AI_SYSTEM:
738
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
739
- SemanticConvetion.GEN_AI_ENVIRONMENT:
740
- environment,
741
- SemanticConvetion.GEN_AI_TYPE:
742
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
743
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
744
- kwargs.get("model", "dall-e-2")
745
- }
725
+ attributes = create_metrics_attributes(
726
+ service_name=application_name,
727
+ deployment_environment=environment,
728
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
729
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
730
+ request_model=request_model,
731
+ server_address=server_address,
732
+ server_port=server_port,
733
+ response_model=request_model,
734
+ )
746
735
 
736
+ metrics["genai_client_operation_duration"].record(
737
+ end_time - start_time, attributes
738
+ )
747
739
  metrics["genai_requests"].add(1, attributes)
748
740
  metrics["genai_cost"].record(cost, attributes)
749
741
 
@@ -759,20 +751,19 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
759
751
 
760
752
  return wrapper
761
753
 
762
- def async_image_variatons(gen_ai_endpoint, version, environment, application_name,
763
- tracer, pricing_info, trace_content, metrics, disable_metrics):
754
+ def async_image_variatons(version, environment, application_name,
755
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
764
756
  """
765
757
  Generates a telemetry wrapper for creating image variations to collect metrics.
766
-
758
+
767
759
  Args:
768
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
769
760
  version: Version of the monitoring package.
770
761
  environment: Deployment environment (e.g., production, staging).
771
762
  application_name: Name of the application using the OpenAI API.
772
763
  tracer: OpenTelemetry tracer for creating spans.
773
764
  pricing_info: Information used for calculating the cost of generating image variations.
774
765
  trace_content: Flag indicating whether to trace the input image and generated variations.
775
-
766
+
776
767
  Returns:
777
768
  A function that wraps the image variations creation method to add telemetry.
778
769
  """
@@ -794,8 +785,16 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
794
785
  The response from the original 'images.create.variations' method.
795
786
  """
796
787
 
797
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
788
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
789
+ request_model = kwargs.get("model", "dall-e-2")
790
+
791
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
792
+
793
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
794
+ start_time = time.time()
798
795
  response = await wrapped(*args, **kwargs)
796
+ end_time = time.time()
797
+
799
798
  images_count = 0
800
799
 
801
800
  try:
@@ -806,34 +805,45 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
806
805
  image = "url"
807
806
 
808
807
  # Calculate cost of the operation
809
- cost = get_image_model_cost(kwargs.get("model", "dall-e-2"), pricing_info,
808
+ cost = get_image_model_cost(request_model, pricing_info,
810
809
  kwargs.get("size", "1024x1024"), "standard")
811
810
 
812
811
  for items in response.data:
813
- # Set Span attributes
812
+ # Set Span attributes (OTel Semconv)
814
813
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
814
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
815
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
815
816
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
816
817
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
817
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
818
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
819
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
820
- gen_ai_endpoint)
818
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
819
+ request_model)
820
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
821
+ server_address)
822
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
823
+ server_port)
821
824
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
822
825
  response.created)
823
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
826
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
827
+ request_model)
828
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
829
+ "image")
830
+
831
+ # Set Span attributes (Extras)
832
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
824
833
  environment)
825
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
834
+ span.set_attribute(SERVICE_NAME,
826
835
  application_name)
827
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
828
- kwargs.get("model", "dall-e-2"))
829
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
830
- kwargs.get("user", ""))
831
836
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
832
837
  kwargs.get("size", "1024x1024"))
833
838
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
834
839
  "standard")
840
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
841
+ kwargs.get("user", ""))
842
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
843
+ version)
844
+
835
845
  if trace_content:
836
- attribute_name = f"gen_ai.response.image.{images_count}"
846
+ attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
837
847
  span.add_event(
838
848
  name=attribute_name,
839
849
  attributes={
@@ -848,21 +858,20 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
848
858
  span.set_status(Status(StatusCode.OK))
849
859
 
850
860
  if disable_metrics is False:
851
- attributes = {
852
- TELEMETRY_SDK_NAME:
853
- "openlit",
854
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
855
- application_name,
856
- SemanticConvetion.GEN_AI_SYSTEM:
857
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
858
- SemanticConvetion.GEN_AI_ENVIRONMENT:
859
- environment,
860
- SemanticConvetion.GEN_AI_TYPE:
861
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
862
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
863
- kwargs.get("model", "dall-e-2")
864
- }
861
+ attributes = create_metrics_attributes(
862
+ service_name=application_name,
863
+ deployment_environment=environment,
864
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
865
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
866
+ request_model=request_model,
867
+ server_address=server_address,
868
+ server_port=server_port,
869
+ response_model=request_model,
870
+ )
865
871
 
872
+ metrics["genai_client_operation_duration"].record(
873
+ end_time - start_time, attributes
874
+ )
866
875
  metrics["genai_requests"].add(1, attributes)
867
876
  metrics["genai_cost"].record(cost, attributes)
868
877
 
@@ -878,20 +887,19 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
878
887
 
879
888
  return wrapper
880
889
 
881
- def async_audio_create(gen_ai_endpoint, version, environment, application_name,
882
- tracer, pricing_info, trace_content, metrics, disable_metrics):
890
+ def async_audio_create(version, environment, application_name,
891
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
883
892
  """
884
893
  Generates a telemetry wrapper for creating speech audio to collect metrics.
885
-
894
+
886
895
  Args:
887
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
888
896
  version: Version of the monitoring package.
889
897
  environment: Deployment environment (e.g., production, staging).
890
898
  application_name: Name of the application using the OpenAI API.
891
899
  tracer: OpenTelemetry tracer for creating spans.
892
900
  pricing_info: Information used for calculating the cost of generating speech audio.
893
901
  trace_content: Flag indicating whether to trace the input text and generated audio.
894
-
902
+
895
903
  Returns:
896
904
  A function that wraps the speech audio creation method to add telemetry.
897
905
  """
@@ -913,28 +921,42 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
913
921
  The response from the original 'audio.speech.create' method.
914
922
  """
915
923
 
916
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
924
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
925
+ request_model = kwargs.get("model", "tts-1")
926
+
927
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO} {request_model}"
928
+
929
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
930
+ start_time = time.time()
917
931
  response = await wrapped(*args, **kwargs)
932
+ end_time = time.time()
918
933
 
919
934
  try:
920
935
  # Calculate cost of the operation
921
- cost = get_audio_model_cost(kwargs.get("model", "tts-1"),
936
+ cost = get_audio_model_cost(request_model,
922
937
  pricing_info, kwargs.get("input", ""))
923
938
 
924
939
  # Set Span attributes
925
940
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
941
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
942
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO)
926
943
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
927
944
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
928
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
929
- SemanticConvetion.GEN_AI_TYPE_AUDIO)
930
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
931
- gen_ai_endpoint)
932
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
945
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
946
+ request_model)
947
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
948
+ server_address)
949
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
950
+ server_port)
951
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
952
+ request_model)
953
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
954
+ "speech")
955
+
956
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
933
957
  environment)
934
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
958
+ span.set_attribute(SERVICE_NAME,
935
959
  application_name)
936
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
937
- kwargs.get("model", "tts-1"))
938
960
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_VOICE,
939
961
  kwargs.get("voice", "alloy"))
940
962
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
@@ -943,6 +965,8 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
943
965
  kwargs.get("speed", 1))
944
966
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
945
967
  cost)
968
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
969
+ version)
946
970
  if trace_content:
947
971
  span.add_event(
948
972
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -954,21 +978,20 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
954
978
  span.set_status(Status(StatusCode.OK))
955
979
 
956
980
  if disable_metrics is False:
957
- attributes = {
958
- TELEMETRY_SDK_NAME:
959
- "openlit",
960
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
961
- application_name,
962
- SemanticConvetion.GEN_AI_SYSTEM:
963
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
964
- SemanticConvetion.GEN_AI_ENVIRONMENT:
965
- environment,
966
- SemanticConvetion.GEN_AI_TYPE:
967
- SemanticConvetion.GEN_AI_TYPE_AUDIO,
968
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
969
- kwargs.get("model", "tts-1")
970
- }
981
+ attributes = create_metrics_attributes(
982
+ service_name=application_name,
983
+ deployment_environment=environment,
984
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO,
985
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
986
+ request_model=request_model,
987
+ server_address=server_address,
988
+ server_port=server_port,
989
+ response_model=request_model,
990
+ )
971
991
 
992
+ metrics["genai_client_operation_duration"].record(
993
+ end_time - start_time, attributes
994
+ )
972
995
  metrics["genai_requests"].add(1, attributes)
973
996
  metrics["genai_cost"].record(cost, attributes)
974
997