openlit 1.33.7__py3-none-any.whl → 1.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. openlit/__helpers.py +83 -0
  2. openlit/__init__.py +1 -1
  3. openlit/instrumentation/ag2/ag2.py +2 -2
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +4 -4
  13. openlit/instrumentation/astra/async_astra.py +4 -4
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +4 -4
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +2 -2
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
  26. openlit/instrumentation/crewai/crewai.py +2 -2
  27. openlit/instrumentation/dynamiq/dynamiq.py +2 -2
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
  30. openlit/instrumentation/embedchain/embedchain.py +4 -4
  31. openlit/instrumentation/firecrawl/firecrawl.py +2 -2
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/gpt4all.py +17 -17
  36. openlit/instrumentation/groq/async_groq.py +14 -14
  37. openlit/instrumentation/groq/groq.py +14 -14
  38. openlit/instrumentation/haystack/haystack.py +2 -2
  39. openlit/instrumentation/julep/async_julep.py +2 -2
  40. openlit/instrumentation/julep/julep.py +2 -2
  41. openlit/instrumentation/langchain/langchain.py +36 -31
  42. openlit/instrumentation/letta/letta.py +6 -6
  43. openlit/instrumentation/litellm/async_litellm.py +20 -20
  44. openlit/instrumentation/litellm/litellm.py +20 -20
  45. openlit/instrumentation/llamaindex/llamaindex.py +2 -2
  46. openlit/instrumentation/mem0/mem0.py +2 -2
  47. openlit/instrumentation/milvus/milvus.py +4 -4
  48. openlit/instrumentation/mistral/async_mistral.py +18 -18
  49. openlit/instrumentation/mistral/mistral.py +18 -18
  50. openlit/instrumentation/multion/async_multion.py +2 -2
  51. openlit/instrumentation/multion/multion.py +2 -2
  52. openlit/instrumentation/ollama/async_ollama.py +29 -29
  53. openlit/instrumentation/ollama/ollama.py +29 -29
  54. openlit/instrumentation/openai/__init__.py +11 -230
  55. openlit/instrumentation/openai/async_openai.py +434 -409
  56. openlit/instrumentation/openai/openai.py +415 -393
  57. openlit/instrumentation/phidata/phidata.py +2 -2
  58. openlit/instrumentation/pinecone/pinecone.py +4 -4
  59. openlit/instrumentation/premai/premai.py +20 -20
  60. openlit/instrumentation/qdrant/async_qdrant.py +4 -4
  61. openlit/instrumentation/qdrant/qdrant.py +4 -4
  62. openlit/instrumentation/reka/async_reka.py +6 -6
  63. openlit/instrumentation/reka/reka.py +6 -6
  64. openlit/instrumentation/together/async_together.py +18 -18
  65. openlit/instrumentation/together/together.py +18 -18
  66. openlit/instrumentation/transformers/transformers.py +6 -6
  67. openlit/instrumentation/vertexai/async_vertexai.py +53 -53
  68. openlit/instrumentation/vertexai/vertexai.py +53 -53
  69. openlit/instrumentation/vllm/vllm.py +6 -6
  70. openlit/otel/metrics.py +98 -7
  71. openlit/semcov/__init__.py +113 -80
  72. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/METADATA +2 -1
  73. openlit-1.33.9.dist-info/RECORD +121 -0
  74. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
  75. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  76. openlit/instrumentation/openai/azure_openai.py +0 -898
  77. openlit-1.33.7.dist-info/RECORD +0 -122
  78. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,11 +1,11 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
2
1
  """
3
2
  Module for monitoring OpenAI API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
10
  get_chat_model_cost,
11
11
  get_embed_model_cost,
@@ -14,19 +14,22 @@ from openlit.__helpers import (
14
14
  openai_tokens,
15
15
  handle_exception,
16
16
  response_as_dict,
17
+ calculate_ttft,
18
+ calculate_tbt,
19
+ create_metrics_attributes,
20
+ set_server_address_and_port
17
21
  )
18
22
  from openlit.semcov import SemanticConvetion
19
23
 
20
24
  # Initialize logger for logging potential issues and operations
21
25
  logger = logging.getLogger(__name__)
22
26
 
23
- def async_chat_completions(gen_ai_endpoint, version, environment, application_name,
27
+ def async_chat_completions(version, environment, application_name,
24
28
  tracer, pricing_info, trace_content, metrics, disable_metrics):
25
29
  """
26
30
  Generates a telemetry wrapper for chat completions to collect metrics.
27
31
 
28
32
  Args:
29
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
30
33
  version: Version of the monitoring package.
31
34
  environment: Deployment environment (e.g., production, staging).
32
35
  application_name: Name of the application using the OpenAI API.
@@ -54,6 +57,8 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
54
57
  wrapped,
55
58
  span,
56
59
  kwargs,
60
+ server_address,
61
+ server_port,
57
62
  **args,
58
63
  ):
59
64
  self.__wrapped__ = wrapped
@@ -61,9 +66,20 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
61
66
  # Placeholder for aggregating streaming response
62
67
  self._llmresponse = ""
63
68
  self._response_id = ""
69
+ self._response_model = ""
70
+ self._finish_reason = ""
71
+ self._openai_response_service_tier = ""
72
+ self._openai_system_fingerprint = ""
64
73
 
65
74
  self._args = args
66
75
  self._kwargs = kwargs
76
+ self._start_time = time.time()
77
+ self._end_time = None
78
+ self._timestamps = []
79
+ self._ttft = 0
80
+ self._tbt = 0
81
+ self._server_address = server_address
82
+ self._server_port = server_port
67
83
 
68
84
  async def __aenter__(self):
69
85
  await self.__wrapped__.__aenter__()
@@ -82,6 +98,14 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
82
98
  async def __anext__(self):
83
99
  try:
84
100
  chunk = await self.__wrapped__.__anext__()
101
+ end_time = time.time()
102
+ # Record the timestamp for the current chunk
103
+ self._timestamps.append(end_time)
104
+
105
+ if len(self._timestamps) == 1:
106
+ # Calculate time to first chunk
107
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
108
+
85
109
  chunked = response_as_dict(chunk)
86
110
  # Collect message IDs and aggregated response from events
87
111
  if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
@@ -91,10 +115,18 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
91
115
  if content:
92
116
  self._llmresponse += content
93
117
  self._response_id = chunked.get('id')
118
+ self._response_model = chunked.get('model')
119
+ self._finish_reason = chunked.get('choices')[0].get('finish_reason')
120
+ self._openai_response_service_tier = chunked.get('service_tier')
121
+ self._openai_system_fingerprint = chunked.get('system_fingerprint')
94
122
  return chunk
95
123
  except StopAsyncIteration:
96
124
  # Handling exception ensure observability without disrupting operation
97
125
  try:
126
+ self._end_time = time.time()
127
+ if len(self._timestamps) > 1:
128
+ self._tbt = calculate_tbt(self._timestamps)
129
+
98
130
  # Format 'messages' into a single string
99
131
  message_prompt = self._kwargs.get("messages", "")
100
132
  formatted_messages = []
@@ -103,68 +135,100 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
103
135
  content = message["content"]
104
136
 
105
137
  if isinstance(content, list):
106
- content_str = ", ".join(
107
- # pylint: disable=line-too-long
108
- f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
109
- if "type" in item else f'text: {item["text"]}'
110
- for item in content
111
- )
138
+ content_str_list = []
139
+ for item in content:
140
+ if item["type"] == "text":
141
+ content_str_list.append(f'text: {item["text"]}')
142
+ elif (item["type"] == "image_url" and
143
+ not item["image_url"]["url"].startswith("data:")):
144
+ content_str_list.append(f'image_url: {item["image_url"]["url"]}')
145
+ content_str = ", ".join(content_str_list)
112
146
  formatted_messages.append(f"{role}: {content_str}")
113
147
  else:
114
148
  formatted_messages.append(f"{role}: {content}")
115
149
  prompt = "\n".join(formatted_messages)
116
150
 
151
+ request_model = self._kwargs.get("model", "gpt-4o")
152
+
117
153
  # Calculate tokens using input prompt and aggregated response
118
- prompt_tokens = openai_tokens(prompt,
119
- self._kwargs.get("model", "gpt-3.5-turbo"))
120
- completion_tokens = openai_tokens(self._llmresponse,
121
- self._kwargs.get("model", "gpt-3.5-turbo"))
154
+ input_tokens = openai_tokens(prompt,
155
+ request_model)
156
+ output_tokens = openai_tokens(self._llmresponse,
157
+ request_model)
122
158
 
123
159
  # Calculate cost of the operation
124
- cost = get_chat_model_cost(self._kwargs.get("model", "gpt-3.5-turbo"),
125
- pricing_info, prompt_tokens,
126
- completion_tokens)
160
+ cost = get_chat_model_cost(request_model,
161
+ pricing_info, input_tokens,
162
+ output_tokens)
127
163
 
128
- # Set Span attributes
164
+ # Set Span attributes (OTel Semconv)
129
165
  self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
166
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
167
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
130
168
  self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
131
169
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
132
- self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
133
- SemanticConvetion.GEN_AI_TYPE_CHAT)
134
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
135
- gen_ai_endpoint)
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
171
+ request_model)
172
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
173
+ self._kwargs.get("seed", ""))
174
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
175
+ self._server_port)
176
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
177
+ self._kwargs.get("frequency_penalty", 0.0))
178
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
179
+ self._kwargs.get("max_tokens", -1))
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
181
+ self._kwargs.get("presence_penalty", 0.0))
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
183
+ self._kwargs.get("stop", []))
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
185
+ self._kwargs.get("temperature", 1.0))
186
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
187
+ self._kwargs.get("top_p", 1.0))
188
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
189
+ [self._finish_reason])
136
190
  self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
137
191
  self._response_id)
138
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
192
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
193
+ self._response_model)
194
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
195
+ input_tokens)
196
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
197
+ output_tokens)
198
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
199
+ self._server_address)
200
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
201
+ self._kwargs.get("service_tier", "auto"))
202
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
203
+ self._openai_response_service_tier)
204
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
205
+ self._openai_system_fingerprint)
206
+ if isinstance(self._llmresponse, str):
207
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
208
+ "text")
209
+ else:
210
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
211
+ "json")
212
+
213
+ # Set Span attributes (Extra)
214
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
139
215
  environment)
140
- self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
216
+ self._span.set_attribute(SERVICE_NAME,
141
217
  application_name)
142
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
143
- self._kwargs.get("model", "gpt-3.5-turbo"))
144
218
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
145
219
  self._kwargs.get("user", ""))
146
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
147
- self._kwargs.get("top_p", 1.0))
148
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
149
- self._kwargs.get("max_tokens", -1))
150
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
151
- self._kwargs.get("temperature", 1.0))
152
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
153
- self._kwargs.get("presence_penalty", 0.0))
154
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
155
- self._kwargs.get("frequency_penalty", 0.0))
156
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
157
- self._kwargs.get("seed", ""))
158
220
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
159
221
  True)
160
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
161
- prompt_tokens)
162
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
163
- completion_tokens)
164
222
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
165
- prompt_tokens + completion_tokens)
223
+ input_tokens + output_tokens)
166
224
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
167
225
  cost)
226
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
227
+ self._tbt)
228
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
229
+ self._ttft)
230
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
231
+ version)
168
232
  if trace_content:
169
233
  self._span.add_event(
170
234
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -178,31 +242,35 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
178
242
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
179
243
  },
180
244
  )
181
-
182
245
  self._span.set_status(Status(StatusCode.OK))
183
246
 
184
247
  if disable_metrics is False:
185
- attributes = {
186
- TELEMETRY_SDK_NAME:
187
- "openlit",
188
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
189
- application_name,
190
- SemanticConvetion.GEN_AI_SYSTEM:
191
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
192
- SemanticConvetion.GEN_AI_ENVIRONMENT:
193
- environment,
194
- SemanticConvetion.GEN_AI_TYPE:
195
- SemanticConvetion.GEN_AI_TYPE_CHAT,
196
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
197
- self._kwargs.get("model", "gpt-3.5-turbo")
198
- }
248
+ attributes = create_metrics_attributes(
249
+ service_name=application_name,
250
+ deployment_environment=environment,
251
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
252
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
253
+ request_model=request_model,
254
+ server_address=self._server_address,
255
+ server_port=self._server_port,
256
+ response_model=self._response_model,
257
+ )
199
258
 
200
- metrics["genai_requests"].add(1, attributes)
201
- metrics["genai_total_tokens"].add(
202
- prompt_tokens + completion_tokens, attributes
259
+ metrics["genai_client_usage_tokens"].record(
260
+ input_tokens + output_tokens, attributes
261
+ )
262
+ metrics["genai_client_operation_duration"].record(
263
+ self._end_time - self._start_time, attributes
203
264
  )
204
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
205
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
265
+ metrics["genai_server_tbt"].record(
266
+ self._tbt, attributes
267
+ )
268
+ metrics["genai_server_ttft"].record(
269
+ self._ttft, attributes
270
+ )
271
+ metrics["genai_requests"].add(1, attributes)
272
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
273
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
206
274
  metrics["genai_cost"].record(cost, attributes)
207
275
 
208
276
  except Exception as e:
@@ -231,20 +299,25 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
231
299
 
232
300
  # Check if streaming is enabled for the API call
233
301
  streaming = kwargs.get("stream", False)
302
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
303
+ request_model = kwargs.get("model", "gpt-4o")
304
+
305
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
234
306
 
235
307
  # pylint: disable=no-else-return
236
308
  if streaming:
237
309
  # Special handling for streaming response to accommodate the nature of data flow
238
310
  awaited_wrapped = await wrapped(*args, **kwargs)
239
- span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
311
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
240
312
 
241
- return TracedAsyncStream(awaited_wrapped, span, kwargs)
313
+ return TracedAsyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
242
314
 
243
315
  # Handling for non-streaming responses
244
316
  else:
245
- # pylint: disable=line-too-long
246
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
317
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
318
+ start_time = time.time()
247
319
  response = await wrapped(*args, **kwargs)
320
+ end_time = time.time()
248
321
 
249
322
  response_dict = response_as_dict(response)
250
323
 
@@ -258,7 +331,6 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
258
331
 
259
332
  if isinstance(content, list):
260
333
  content_str = ", ".join(
261
- # pylint: disable=line-too-long
262
334
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
263
335
  if "type" in item else f'text: {item["text"]}'
264
336
  for item in content
@@ -268,38 +340,72 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
268
340
  formatted_messages.append(f"{role}: {content}")
269
341
  prompt = "\n".join(formatted_messages)
270
342
 
271
- # Set base span attribues
343
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
344
+ output_tokens = response_dict.get('usage').get('completion_tokens')
345
+
346
+ # Calculate cost of the operation
347
+ cost = get_chat_model_cost(request_model,
348
+ pricing_info, input_tokens,
349
+ output_tokens)
350
+
351
+ # Set base span attribues (OTel Semconv)
272
352
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
353
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
354
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
273
355
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
274
356
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
275
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
276
- SemanticConvetion.GEN_AI_TYPE_CHAT)
277
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
278
- gen_ai_endpoint)
357
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
358
+ request_model)
359
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
360
+ kwargs.get("seed", ""))
361
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
362
+ server_port)
363
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
364
+ kwargs.get("frequency_penalty", 0.0))
365
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
366
+ kwargs.get("max_tokens", -1))
367
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
368
+ kwargs.get("presence_penalty", 0.0))
369
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
370
+ kwargs.get("stop", []))
371
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
372
+ kwargs.get("temperature", 1.0))
373
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
374
+ kwargs.get("top_p", 1.0))
279
375
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
280
376
  response_dict.get("id"))
281
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
377
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
378
+ response_dict.get('model'))
379
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
380
+ input_tokens)
381
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
382
+ output_tokens)
383
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
384
+ server_address)
385
+ span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
386
+ kwargs.get("service_tier", "auto"))
387
+ span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
388
+ response_dict.get('service_tier'))
389
+ span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
390
+ response_dict.get('system_fingerprint'))
391
+
392
+ # Set base span attribues (Extras)
393
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
282
394
  environment)
283
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
395
+ span.set_attribute(SERVICE_NAME,
284
396
  application_name)
285
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
286
- kwargs.get("model", "gpt-3.5-turbo"))
287
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
288
- kwargs.get("top_p", 1.0))
289
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
290
- kwargs.get("max_tokens", -1))
291
397
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
292
398
  kwargs.get("user", ""))
293
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
294
- kwargs.get("temperature", 1.0))
295
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
296
- kwargs.get("presence_penalty", 0.0))
297
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
298
- kwargs.get("frequency_penalty", 0.0))
299
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
300
- kwargs.get("seed", ""))
301
399
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
302
400
  False)
401
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
402
+ input_tokens + output_tokens)
403
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
404
+ cost)
405
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
406
+ end_time - start_time)
407
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
408
+ version)
303
409
  if trace_content:
304
410
  span.add_event(
305
411
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -308,93 +414,54 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
308
414
  },
309
415
  )
310
416
 
311
- # Set span attributes when tools is not passed to the function call
312
- if "tools" not in kwargs:
313
- # Calculate cost of the operation
314
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
315
- pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
316
- response_dict.get('usage', {}).get('completion_tokens', None))
317
-
318
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
319
- response_dict.get('usage', {}).get('prompt_tokens', None))
320
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
321
- response_dict.get('usage', {}).get('completion_tokens', None))
322
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
323
- response_dict.get('usage', {}).get('total_tokens', None))
417
+ for i in range(kwargs.get('n',1)):
324
418
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
325
- [response_dict.get('choices', [])[0].get('finish_reason', None)])
326
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
327
- cost)
328
-
329
- # Set span attributes for when n = 1 (default)
330
- if "n" not in kwargs or kwargs["n"] == 1:
331
- if trace_content:
332
- span.add_event(
333
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
334
- attributes={
335
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
336
- },
337
- )
338
-
339
- # Set span attributes for when n > 0
340
- else:
341
- i = 0
342
- while i < kwargs["n"] and trace_content is True:
343
- attribute_name = f"gen_ai.content.completion.{i}"
344
- span.add_event(
345
- name=attribute_name,
346
- attributes={
347
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
348
- },
349
- )
350
- i += 1
351
-
352
- # Return original response
353
- return response
354
-
355
- # Set span attributes when tools is passed to the function call
356
- elif "tools" in kwargs:
357
- # Calculate cost of the operation
358
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
359
- pricing_info, response_dict.get('usage').get('prompt_tokens'),
360
- response_dict.get('usage').get('completion_tokens'))
361
- span.add_event(
362
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
363
- attributes={
364
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
365
- },
366
- )
367
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
368
- response_dict.get('usage').get('prompt_tokens'))
369
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
370
- response_dict.get('usage').get('completion_tokens'))
371
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
372
- response_dict.get('usage').get('total_tokens'))
373
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
374
- cost)
419
+ [response_dict.get('choices')[i].get('finish_reason')])
420
+ if trace_content:
421
+ span.add_event(
422
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
423
+ attributes={
424
+ # pylint: disable=line-too-long
425
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
426
+ },
427
+ )
428
+ if kwargs.get('tools'):
429
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
430
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
431
+
432
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
433
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
434
+ "text")
435
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
436
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
437
+ "json")
375
438
 
376
439
  span.set_status(Status(StatusCode.OK))
377
440
 
378
441
  if disable_metrics is False:
379
- attributes = {
380
- TELEMETRY_SDK_NAME:
381
- "openlit",
382
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
383
- application_name,
384
- SemanticConvetion.GEN_AI_SYSTEM:
385
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
386
- SemanticConvetion.GEN_AI_ENVIRONMENT:
387
- environment,
388
- SemanticConvetion.GEN_AI_TYPE:
389
- SemanticConvetion.GEN_AI_TYPE_CHAT,
390
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
391
- kwargs.get("model", "gpt-3.5-turbo")
392
- }
442
+ attributes = create_metrics_attributes(
443
+ service_name=application_name,
444
+ deployment_environment=environment,
445
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
446
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
447
+ request_model=request_model,
448
+ server_address=server_address,
449
+ server_port=server_port,
450
+ response_model=response_dict.get('model'),
451
+ )
393
452
 
453
+ metrics["genai_client_usage_tokens"].record(
454
+ input_tokens + output_tokens, attributes
455
+ )
456
+ metrics["genai_client_operation_duration"].record(
457
+ end_time - start_time, attributes
458
+ )
459
+ metrics["genai_server_ttft"].record(
460
+ end_time - start_time, attributes
461
+ )
394
462
  metrics["genai_requests"].add(1, attributes)
395
- metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
396
- metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
397
- metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
463
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
464
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
398
465
  metrics["genai_cost"].record(cost, attributes)
399
466
 
400
467
  # Return original response
@@ -409,20 +476,19 @@ def async_chat_completions(gen_ai_endpoint, version, environment, application_na
409
476
 
410
477
  return wrapper
411
478
 
412
- def async_embedding(gen_ai_endpoint, version, environment, application_name,
413
- tracer, pricing_info, trace_content, metrics, disable_metrics):
479
+ def async_embedding(version, environment, application_name,
480
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
414
481
  """
415
482
  Generates a telemetry wrapper for embeddings to collect metrics.
416
-
483
+
417
484
  Args:
418
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
419
485
  version: Version of the monitoring package.
420
486
  environment: Deployment environment (e.g., production, staging).
421
487
  application_name: Name of the application using the OpenAI API.
422
488
  tracer: OpenTelemetry tracer for creating spans.
423
489
  pricing_info: Information used for calculating the cost of OpenAI usage.
424
490
  trace_content: Flag indicating whether to trace the actual content.
425
-
491
+
426
492
  Returns:
427
493
  A function that wraps the embeddings method to add telemetry.
428
494
  """
@@ -444,40 +510,56 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
444
510
  The response from the original 'embeddings' method.
445
511
  """
446
512
 
447
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
513
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
514
+ request_model = kwargs.get("model", "text-embedding-ada-002")
515
+
516
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
517
+
518
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
519
+ start_time = time.time()
448
520
  response = await wrapped(*args, **kwargs)
521
+ end_time = time.time()
522
+
449
523
  response_dict = response_as_dict(response)
450
524
  try:
525
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
526
+
451
527
  # Calculate cost of the operation
452
- cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
453
- pricing_info, response_dict.get('usage').get('prompt_tokens'))
528
+ cost = get_embed_model_cost(request_model,
529
+ pricing_info, input_tokens)
454
530
 
455
- # Set Span attributes
531
+ # Set Span attributes (OTel Semconv)
456
532
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
533
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
534
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
457
535
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
458
536
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
459
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
460
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
461
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
462
- gen_ai_endpoint)
463
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
537
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
538
+ request_model)
539
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
540
+ [kwargs.get('encoding_format', 'float')])
541
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
542
+ request_model)
543
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
544
+ server_address)
545
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
546
+ server_port)
547
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
548
+ input_tokens)
549
+
550
+ # Set Span attributes (Extras)
551
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
464
552
  environment)
465
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
553
+ span.set_attribute(SERVICE_NAME,
466
554
  application_name)
467
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
468
- kwargs.get("model", "text-embedding-ada-002"))
469
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
470
- kwargs.get("encoding_format", "float"))
471
- # span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
472
- # kwargs.get("dimensions", "null"))
473
555
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
474
556
  kwargs.get("user", ""))
475
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
476
- response_dict.get('usage').get('prompt_tokens'))
477
557
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
478
- response_dict.get('usage').get('total_tokens'))
558
+ input_tokens)
479
559
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
480
560
  cost)
561
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
562
+ version)
481
563
 
482
564
  if trace_content:
483
565
  span.add_event(
@@ -490,26 +572,24 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
490
572
  span.set_status(Status(StatusCode.OK))
491
573
 
492
574
  if disable_metrics is False:
493
- attributes = {
494
- TELEMETRY_SDK_NAME:
495
- "openlit",
496
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
497
- application_name,
498
- SemanticConvetion.GEN_AI_SYSTEM:
499
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
500
- SemanticConvetion.GEN_AI_ENVIRONMENT:
501
- environment,
502
- SemanticConvetion.GEN_AI_TYPE:
503
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
504
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
505
- kwargs.get("model", "text-embedding-ada-002")
506
- }
507
-
575
+ attributes = create_metrics_attributes(
576
+ service_name=application_name,
577
+ deployment_environment=environment,
578
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
579
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
580
+ request_model=request_model,
581
+ server_address=server_address,
582
+ server_port=server_port,
583
+ response_model=request_model,
584
+ )
585
+ metrics["genai_client_usage_tokens"].record(
586
+ input_tokens, attributes
587
+ )
588
+ metrics["genai_client_operation_duration"].record(
589
+ end_time - start_time, attributes
590
+ )
508
591
  metrics["genai_requests"].add(1, attributes)
509
- metrics["genai_total_tokens"].add(
510
- response_dict.get('usage').get('total_tokens'), attributes)
511
- metrics["genai_prompt_tokens"].add(
512
- response_dict.get('usage').get('prompt_tokens'), attributes)
592
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
513
593
  metrics["genai_cost"].record(cost, attributes)
514
594
 
515
595
  # Return original response
@@ -524,125 +604,19 @@ def async_embedding(gen_ai_endpoint, version, environment, application_name,
524
604
 
525
605
  return wrapper
526
606
 
527
- def async_finetune(gen_ai_endpoint, version, environment, application_name,
607
+ def async_image_generate(version, environment, application_name,
528
608
  tracer, pricing_info, trace_content, metrics, disable_metrics):
529
609
  """
530
- Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
531
-
532
- Args:
533
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
534
- version: Version of the monitoring package.
535
- environment: Deployment environment (e.g., production, staging).
536
- application_name: Name of the application using the OpenAI API.
537
- tracer: OpenTelemetry tracer for creating spans.
538
- pricing_info: Information used for calculating the cost of OpenAI usage.
539
- trace_content: Flag indicating whether to trace the actual content.
540
-
541
- Returns:
542
- A function that wraps the fine tuning creation method to add telemetry.
543
- """
544
-
545
- async def wrapper(wrapped, instance, args, kwargs):
546
- """
547
- Wraps the 'fine_tuning.jobs.create' API call to add telemetry.
548
-
549
- This collects metrics such as execution time, usage stats, and handles errors
550
- gracefully, adding details to the trace for observability.
551
-
552
- Args:
553
- wrapped: The original 'fine_tuning.jobs.create' method to be wrapped.
554
- instance: The instance of the class where the original method is defined.
555
- args: Positional arguments for the method.
556
- kwargs: Keyword arguments for the method.
557
-
558
- Returns:
559
- The response from the original 'fine_tuning.jobs.create' method.
560
- """
561
-
562
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
563
- response = await wrapped(*args, **kwargs)
564
-
565
- # Handling exception ensure observability without disrupting operation
566
- try:
567
- # Set Span attributes
568
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
569
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
570
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
571
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
572
- SemanticConvetion.GEN_AI_TYPE_FINETUNING)
573
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
574
- gen_ai_endpoint)
575
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
576
- environment)
577
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
578
- application_name)
579
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
580
- kwargs.get("model", "gpt-3.5-turbo"))
581
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TRAINING_FILE,
582
- kwargs.get("training_file", ""))
583
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_VALIDATION_FILE,
584
- kwargs.get("validation_file", ""))
585
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_BATCH_SIZE,
586
- kwargs.get("hyperparameters.batch_size", "auto"))
587
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_LRM,
588
- kwargs.get("hyperparameters.learning_rate_multiplier",
589
- "auto"))
590
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_EPOCHS,
591
- kwargs.get("hyperparameters.n_epochs", "auto"))
592
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_SUFFIX,
593
- kwargs.get("suffix", ""))
594
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
595
- response.id)
596
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
597
- response.usage.prompt_tokens)
598
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_STATUS,
599
- response.status)
600
- span.set_status(Status(StatusCode.OK))
601
-
602
- if disable_metrics is False:
603
- attributes = {
604
- TELEMETRY_SDK_NAME:
605
- "openlit",
606
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
607
- application_name,
608
- SemanticConvetion.GEN_AI_SYSTEM:
609
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
610
- SemanticConvetion.GEN_AI_ENVIRONMENT:
611
- environment,
612
- SemanticConvetion.GEN_AI_TYPE:
613
- SemanticConvetion.GEN_AI_TYPE_FINETUNING,
614
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
615
- kwargs.get("model", "gpt-3.5-turbo")
616
- }
617
-
618
- metrics["genai_requests"].add(1, attributes)
619
-
620
- # Return original response
621
- return response
622
-
623
- except Exception as e:
624
- handle_exception(span, e)
625
- logger.error("Error in trace creation: %s", e)
626
-
627
- # Return original response
628
- return response
629
-
630
- return wrapper
631
-
632
- def async_image_generate(gen_ai_endpoint, version, environment, application_name,
633
- tracer, pricing_info, trace_content, metrics, disable_metrics):
634
- """
635
610
  Generates a telemetry wrapper for image generation to collect metrics.
636
-
611
+
637
612
  Args:
638
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
639
613
  version: Version of the monitoring package.
640
614
  environment: Deployment environment (e.g., production, staging).
641
615
  application_name: Name of the application using the OpenAI API.
642
616
  tracer: OpenTelemetry tracer for creating spans.
643
617
  pricing_info: Information used for calculating the cost of OpenAI image generation.
644
618
  trace_content: Flag indicating whether to trace the input prompt and generated images.
645
-
619
+
646
620
  Returns:
647
621
  A function that wraps the image generation method to add telemetry.
648
622
  """
@@ -664,8 +638,16 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
664
638
  The response from the original 'images.generate' method.
665
639
  """
666
640
 
667
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
641
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
642
+ request_model = kwargs.get("model", "dall-e-2")
643
+
644
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
645
+
646
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
647
+ start_time = time.time()
668
648
  response = await wrapped(*args, **kwargs)
649
+ end_time = time.time()
650
+
669
651
  images_count = 0
670
652
 
671
653
  try:
@@ -675,28 +657,38 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
675
657
  else:
676
658
  image = "url"
677
659
 
660
+ request_model = kwargs.get("model", "dall-e-2")
661
+
678
662
  # Calculate cost of the operation
679
- cost = get_image_model_cost(kwargs.get("model", "dall-e-2"),
663
+ cost = get_image_model_cost(request_model,
680
664
  pricing_info, kwargs.get("size", "1024x1024"),
681
665
  kwargs.get("quality", "standard"))
682
666
 
683
667
  for items in response.data:
684
- # Set Span attributes
668
+ # Set Span attributes (OTel Semconv)
685
669
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
670
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
671
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
686
672
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
687
673
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
688
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
689
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
690
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
691
- gen_ai_endpoint)
674
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
675
+ request_model)
676
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
677
+ server_address)
678
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
679
+ server_port)
692
680
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
693
681
  response.created)
694
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
682
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
683
+ request_model)
684
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
685
+ "image")
686
+
687
+ # Set Span attributes (Extras)
688
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
695
689
  environment)
696
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
690
+ span.set_attribute(SERVICE_NAME,
697
691
  application_name)
698
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
699
- kwargs.get("model", "dall-e-2"))
700
692
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
701
693
  kwargs.get("size", "1024x1024"))
702
694
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
@@ -707,6 +699,9 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
707
699
  items.revised_prompt if items.revised_prompt else "")
708
700
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
709
701
  kwargs.get("user", ""))
702
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
703
+ version)
704
+
710
705
  if trace_content:
711
706
  span.add_event(
712
707
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -714,7 +709,7 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
714
709
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
715
710
  },
716
711
  )
717
- attribute_name = f"gen_ai.response.image.{images_count}"
712
+ attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
718
713
  span.add_event(
719
714
  name=attribute_name,
720
715
  attributes={
@@ -729,21 +724,20 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
729
724
  span.set_status(Status(StatusCode.OK))
730
725
 
731
726
  if disable_metrics is False:
732
- attributes = {
733
- TELEMETRY_SDK_NAME:
734
- "openlit",
735
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
736
- application_name,
737
- SemanticConvetion.GEN_AI_SYSTEM:
738
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
739
- SemanticConvetion.GEN_AI_ENVIRONMENT:
740
- environment,
741
- SemanticConvetion.GEN_AI_TYPE:
742
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
743
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
744
- kwargs.get("model", "dall-e-2")
745
- }
727
+ attributes = create_metrics_attributes(
728
+ service_name=application_name,
729
+ deployment_environment=environment,
730
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
731
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
732
+ request_model=request_model,
733
+ server_address=server_address,
734
+ server_port=server_port,
735
+ response_model=request_model,
736
+ )
746
737
 
738
+ metrics["genai_client_operation_duration"].record(
739
+ end_time - start_time, attributes
740
+ )
747
741
  metrics["genai_requests"].add(1, attributes)
748
742
  metrics["genai_cost"].record(cost, attributes)
749
743
 
@@ -759,20 +753,19 @@ def async_image_generate(gen_ai_endpoint, version, environment, application_name
759
753
 
760
754
  return wrapper
761
755
 
762
- def async_image_variatons(gen_ai_endpoint, version, environment, application_name,
763
- tracer, pricing_info, trace_content, metrics, disable_metrics):
756
+ def async_image_variatons(version, environment, application_name,
757
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
764
758
  """
765
759
  Generates a telemetry wrapper for creating image variations to collect metrics.
766
-
760
+
767
761
  Args:
768
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
769
762
  version: Version of the monitoring package.
770
763
  environment: Deployment environment (e.g., production, staging).
771
764
  application_name: Name of the application using the OpenAI API.
772
765
  tracer: OpenTelemetry tracer for creating spans.
773
766
  pricing_info: Information used for calculating the cost of generating image variations.
774
767
  trace_content: Flag indicating whether to trace the input image and generated variations.
775
-
768
+
776
769
  Returns:
777
770
  A function that wraps the image variations creation method to add telemetry.
778
771
  """
@@ -794,8 +787,16 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
794
787
  The response from the original 'images.create.variations' method.
795
788
  """
796
789
 
797
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
790
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
791
+ request_model = kwargs.get("model", "dall-e-2")
792
+
793
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
794
+
795
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
796
+ start_time = time.time()
798
797
  response = await wrapped(*args, **kwargs)
798
+ end_time = time.time()
799
+
799
800
  images_count = 0
800
801
 
801
802
  try:
@@ -806,34 +807,45 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
806
807
  image = "url"
807
808
 
808
809
  # Calculate cost of the operation
809
- cost = get_image_model_cost(kwargs.get("model", "dall-e-2"), pricing_info,
810
+ cost = get_image_model_cost(request_model, pricing_info,
810
811
  kwargs.get("size", "1024x1024"), "standard")
811
812
 
812
813
  for items in response.data:
813
- # Set Span attributes
814
+ # Set Span attributes (OTel Semconv)
814
815
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
816
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
817
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
815
818
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
816
819
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
817
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
818
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
819
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
820
- gen_ai_endpoint)
820
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
821
+ request_model)
822
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
823
+ server_address)
824
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
825
+ server_port)
821
826
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
822
827
  response.created)
823
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
828
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
829
+ request_model)
830
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
831
+ "image")
832
+
833
+ # Set Span attributes (Extras)
834
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
824
835
  environment)
825
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
836
+ span.set_attribute(SERVICE_NAME,
826
837
  application_name)
827
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
828
- kwargs.get("model", "dall-e-2"))
829
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
830
- kwargs.get("user", ""))
831
838
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
832
839
  kwargs.get("size", "1024x1024"))
833
840
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
834
841
  "standard")
842
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
843
+ kwargs.get("user", ""))
844
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
845
+ version)
846
+
835
847
  if trace_content:
836
- attribute_name = f"gen_ai.response.image.{images_count}"
848
+ attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
837
849
  span.add_event(
838
850
  name=attribute_name,
839
851
  attributes={
@@ -848,21 +860,20 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
848
860
  span.set_status(Status(StatusCode.OK))
849
861
 
850
862
  if disable_metrics is False:
851
- attributes = {
852
- TELEMETRY_SDK_NAME:
853
- "openlit",
854
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
855
- application_name,
856
- SemanticConvetion.GEN_AI_SYSTEM:
857
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
858
- SemanticConvetion.GEN_AI_ENVIRONMENT:
859
- environment,
860
- SemanticConvetion.GEN_AI_TYPE:
861
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
862
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
863
- kwargs.get("model", "dall-e-2")
864
- }
863
+ attributes = create_metrics_attributes(
864
+ service_name=application_name,
865
+ deployment_environment=environment,
866
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
867
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
868
+ request_model=request_model,
869
+ server_address=server_address,
870
+ server_port=server_port,
871
+ response_model=request_model,
872
+ )
865
873
 
874
+ metrics["genai_client_operation_duration"].record(
875
+ end_time - start_time, attributes
876
+ )
866
877
  metrics["genai_requests"].add(1, attributes)
867
878
  metrics["genai_cost"].record(cost, attributes)
868
879
 
@@ -878,20 +889,19 @@ def async_image_variatons(gen_ai_endpoint, version, environment, application_nam
878
889
 
879
890
  return wrapper
880
891
 
881
- def async_audio_create(gen_ai_endpoint, version, environment, application_name,
882
- tracer, pricing_info, trace_content, metrics, disable_metrics):
892
+ def async_audio_create(version, environment, application_name,
893
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
883
894
  """
884
895
  Generates a telemetry wrapper for creating speech audio to collect metrics.
885
-
896
+
886
897
  Args:
887
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
888
898
  version: Version of the monitoring package.
889
899
  environment: Deployment environment (e.g., production, staging).
890
900
  application_name: Name of the application using the OpenAI API.
891
901
  tracer: OpenTelemetry tracer for creating spans.
892
902
  pricing_info: Information used for calculating the cost of generating speech audio.
893
903
  trace_content: Flag indicating whether to trace the input text and generated audio.
894
-
904
+
895
905
  Returns:
896
906
  A function that wraps the speech audio creation method to add telemetry.
897
907
  """
@@ -913,28 +923,42 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
913
923
  The response from the original 'audio.speech.create' method.
914
924
  """
915
925
 
916
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
926
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
927
+ request_model = kwargs.get("model", "tts-1")
928
+
929
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO} {request_model}"
930
+
931
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
932
+ start_time = time.time()
917
933
  response = await wrapped(*args, **kwargs)
934
+ end_time = time.time()
918
935
 
919
936
  try:
920
937
  # Calculate cost of the operation
921
- cost = get_audio_model_cost(kwargs.get("model", "tts-1"),
938
+ cost = get_audio_model_cost(request_model,
922
939
  pricing_info, kwargs.get("input", ""))
923
940
 
924
941
  # Set Span attributes
925
942
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
943
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
944
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO)
926
945
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
927
946
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
928
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
929
- SemanticConvetion.GEN_AI_TYPE_AUDIO)
930
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
931
- gen_ai_endpoint)
932
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
947
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
948
+ request_model)
949
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
950
+ server_address)
951
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
952
+ server_port)
953
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
954
+ request_model)
955
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
956
+ "speech")
957
+
958
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
933
959
  environment)
934
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
960
+ span.set_attribute(SERVICE_NAME,
935
961
  application_name)
936
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
937
- kwargs.get("model", "tts-1"))
938
962
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_VOICE,
939
963
  kwargs.get("voice", "alloy"))
940
964
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
@@ -943,6 +967,8 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
943
967
  kwargs.get("speed", 1))
944
968
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
945
969
  cost)
970
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
971
+ version)
946
972
  if trace_content:
947
973
  span.add_event(
948
974
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -954,21 +980,20 @@ def async_audio_create(gen_ai_endpoint, version, environment, application_name,
954
980
  span.set_status(Status(StatusCode.OK))
955
981
 
956
982
  if disable_metrics is False:
957
- attributes = {
958
- TELEMETRY_SDK_NAME:
959
- "openlit",
960
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
961
- application_name,
962
- SemanticConvetion.GEN_AI_SYSTEM:
963
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
964
- SemanticConvetion.GEN_AI_ENVIRONMENT:
965
- environment,
966
- SemanticConvetion.GEN_AI_TYPE:
967
- SemanticConvetion.GEN_AI_TYPE_AUDIO,
968
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
969
- kwargs.get("model", "tts-1")
970
- }
983
+ attributes = create_metrics_attributes(
984
+ service_name=application_name,
985
+ deployment_environment=environment,
986
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO,
987
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
988
+ request_model=request_model,
989
+ server_address=server_address,
990
+ server_port=server_port,
991
+ response_model=request_model,
992
+ )
971
993
 
994
+ metrics["genai_client_operation_duration"].record(
995
+ end_time - start_time, attributes
996
+ )
972
997
  metrics["genai_requests"].add(1, attributes)
973
998
  metrics["genai_cost"].record(cost, attributes)
974
999