openlit 1.33.7__py3-none-any.whl → 1.33.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. openlit/__helpers.py +83 -0
  2. openlit/__init__.py +1 -1
  3. openlit/instrumentation/ag2/ag2.py +2 -2
  4. openlit/instrumentation/ai21/__init__.py +4 -4
  5. openlit/instrumentation/ai21/ai21.py +370 -319
  6. openlit/instrumentation/ai21/async_ai21.py +371 -319
  7. openlit/instrumentation/anthropic/__init__.py +4 -4
  8. openlit/instrumentation/anthropic/anthropic.py +321 -189
  9. openlit/instrumentation/anthropic/async_anthropic.py +323 -190
  10. openlit/instrumentation/assemblyai/__init__.py +1 -1
  11. openlit/instrumentation/assemblyai/assemblyai.py +59 -43
  12. openlit/instrumentation/astra/astra.py +4 -4
  13. openlit/instrumentation/astra/async_astra.py +4 -4
  14. openlit/instrumentation/azure_ai_inference/__init__.py +4 -4
  15. openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +406 -252
  16. openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +406 -252
  17. openlit/instrumentation/bedrock/__init__.py +1 -1
  18. openlit/instrumentation/bedrock/bedrock.py +115 -58
  19. openlit/instrumentation/chroma/chroma.py +4 -4
  20. openlit/instrumentation/cohere/__init__.py +33 -10
  21. openlit/instrumentation/cohere/async_cohere.py +610 -0
  22. openlit/instrumentation/cohere/cohere.py +410 -219
  23. openlit/instrumentation/controlflow/controlflow.py +2 -2
  24. openlit/instrumentation/crawl4ai/async_crawl4ai.py +2 -2
  25. openlit/instrumentation/crawl4ai/crawl4ai.py +2 -2
  26. openlit/instrumentation/crewai/crewai.py +2 -2
  27. openlit/instrumentation/dynamiq/dynamiq.py +2 -2
  28. openlit/instrumentation/elevenlabs/async_elevenlabs.py +73 -47
  29. openlit/instrumentation/elevenlabs/elevenlabs.py +73 -52
  30. openlit/instrumentation/embedchain/embedchain.py +4 -4
  31. openlit/instrumentation/firecrawl/firecrawl.py +2 -2
  32. openlit/instrumentation/google_ai_studio/__init__.py +9 -9
  33. openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +183 -219
  34. openlit/instrumentation/google_ai_studio/google_ai_studio.py +183 -220
  35. openlit/instrumentation/gpt4all/gpt4all.py +17 -17
  36. openlit/instrumentation/groq/async_groq.py +14 -14
  37. openlit/instrumentation/groq/groq.py +14 -14
  38. openlit/instrumentation/haystack/haystack.py +2 -2
  39. openlit/instrumentation/julep/async_julep.py +2 -2
  40. openlit/instrumentation/julep/julep.py +2 -2
  41. openlit/instrumentation/langchain/langchain.py +36 -31
  42. openlit/instrumentation/letta/letta.py +6 -6
  43. openlit/instrumentation/litellm/async_litellm.py +20 -20
  44. openlit/instrumentation/litellm/litellm.py +20 -20
  45. openlit/instrumentation/llamaindex/llamaindex.py +2 -2
  46. openlit/instrumentation/mem0/mem0.py +2 -2
  47. openlit/instrumentation/milvus/milvus.py +4 -4
  48. openlit/instrumentation/mistral/async_mistral.py +18 -18
  49. openlit/instrumentation/mistral/mistral.py +18 -18
  50. openlit/instrumentation/multion/async_multion.py +2 -2
  51. openlit/instrumentation/multion/multion.py +2 -2
  52. openlit/instrumentation/ollama/async_ollama.py +29 -29
  53. openlit/instrumentation/ollama/ollama.py +29 -29
  54. openlit/instrumentation/openai/__init__.py +11 -230
  55. openlit/instrumentation/openai/async_openai.py +434 -409
  56. openlit/instrumentation/openai/openai.py +415 -393
  57. openlit/instrumentation/phidata/phidata.py +2 -2
  58. openlit/instrumentation/pinecone/pinecone.py +4 -4
  59. openlit/instrumentation/premai/premai.py +20 -20
  60. openlit/instrumentation/qdrant/async_qdrant.py +4 -4
  61. openlit/instrumentation/qdrant/qdrant.py +4 -4
  62. openlit/instrumentation/reka/async_reka.py +6 -6
  63. openlit/instrumentation/reka/reka.py +6 -6
  64. openlit/instrumentation/together/async_together.py +18 -18
  65. openlit/instrumentation/together/together.py +18 -18
  66. openlit/instrumentation/transformers/transformers.py +6 -6
  67. openlit/instrumentation/vertexai/async_vertexai.py +53 -53
  68. openlit/instrumentation/vertexai/vertexai.py +53 -53
  69. openlit/instrumentation/vllm/vllm.py +6 -6
  70. openlit/otel/metrics.py +98 -7
  71. openlit/semcov/__init__.py +113 -80
  72. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/METADATA +2 -1
  73. openlit-1.33.9.dist-info/RECORD +121 -0
  74. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/WHEEL +1 -1
  75. openlit/instrumentation/openai/async_azure_openai.py +0 -900
  76. openlit/instrumentation/openai/azure_openai.py +0 -898
  77. openlit-1.33.7.dist-info/RECORD +0 -122
  78. {openlit-1.33.7.dist-info → openlit-1.33.9.dist-info}/LICENSE +0 -0
@@ -1,11 +1,11 @@
1
- # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
2
1
  """
3
2
  Module for monitoring OpenAI API calls.
4
3
  """
5
4
 
6
5
  import logging
6
+ import time
7
7
  from opentelemetry.trace import SpanKind, Status, StatusCode
8
- from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
8
+ from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
9
9
  from openlit.__helpers import (
10
10
  get_chat_model_cost,
11
11
  get_embed_model_cost,
@@ -14,19 +14,22 @@ from openlit.__helpers import (
14
14
  openai_tokens,
15
15
  handle_exception,
16
16
  response_as_dict,
17
+ calculate_ttft,
18
+ calculate_tbt,
19
+ create_metrics_attributes,
20
+ set_server_address_and_port
17
21
  )
18
22
  from openlit.semcov import SemanticConvetion
19
23
 
20
24
  # Initialize logger for logging potential issues and operations
21
25
  logger = logging.getLogger(__name__)
22
26
 
23
- def chat_completions(gen_ai_endpoint, version, environment, application_name,
27
+ def chat_completions(version, environment, application_name,
24
28
  tracer, pricing_info, trace_content, metrics, disable_metrics):
25
29
  """
26
30
  Generates a telemetry wrapper for chat completions to collect metrics.
27
31
 
28
32
  Args:
29
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
30
33
  version: Version of the monitoring package.
31
34
  environment: Deployment environment (e.g., production, staging).
32
35
  application_name: Name of the application using the OpenAI API.
@@ -54,6 +57,8 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
54
57
  wrapped,
55
58
  span,
56
59
  kwargs,
60
+ server_address,
61
+ server_port,
57
62
  **args,
58
63
  ):
59
64
  self.__wrapped__ = wrapped
@@ -61,9 +66,20 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
61
66
  # Placeholder for aggregating streaming response
62
67
  self._llmresponse = ""
63
68
  self._response_id = ""
69
+ self._response_model = ""
70
+ self._finish_reason = ""
71
+ self._openai_response_service_tier = ""
72
+ self._openai_system_fingerprint = ""
64
73
 
65
74
  self._args = args
66
75
  self._kwargs = kwargs
76
+ self._start_time = time.time()
77
+ self._end_time = None
78
+ self._timestamps = []
79
+ self._ttft = 0
80
+ self._tbt = 0
81
+ self._server_address = server_address
82
+ self._server_port = server_port
67
83
 
68
84
  def __enter__(self):
69
85
  self.__wrapped__.__enter__()
@@ -82,6 +98,14 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
82
98
  def __next__(self):
83
99
  try:
84
100
  chunk = self.__wrapped__.__next__()
101
+ end_time = time.time()
102
+ # Record the timestamp for the current chunk
103
+ self._timestamps.append(end_time)
104
+
105
+ if len(self._timestamps) == 1:
106
+ # Calculate time to first chunk
107
+ self._ttft = calculate_ttft(self._timestamps, self._start_time)
108
+
85
109
  chunked = response_as_dict(chunk)
86
110
  # Collect message IDs and aggregated response from events
87
111
  if (len(chunked.get('choices')) > 0 and ('delta' in chunked.get('choices')[0] and
@@ -91,10 +115,18 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
91
115
  if content:
92
116
  self._llmresponse += content
93
117
  self._response_id = chunked.get('id')
118
+ self._response_model = chunked.get('model')
119
+ self._finish_reason = chunked.get('choices')[0].get('finish_reason')
120
+ self._openai_response_service_tier = chunked.get('service_tier')
121
+ self._openai_system_fingerprint = chunked.get('system_fingerprint')
94
122
  return chunk
95
123
  except StopIteration:
96
124
  # Handling exception ensure observability without disrupting operation
97
125
  try:
126
+ self._end_time = time.time()
127
+ if len(self._timestamps) > 1:
128
+ self._tbt = calculate_tbt(self._timestamps)
129
+
98
130
  # Format 'messages' into a single string
99
131
  message_prompt = self._kwargs.get("messages", "")
100
132
  formatted_messages = []
@@ -109,7 +141,6 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
109
141
  content_str_list.append(f'text: {item["text"]}')
110
142
  elif (item["type"] == "image_url" and
111
143
  not item["image_url"]["url"].startswith("data:")):
112
- # pylint: disable=line-too-long
113
144
  content_str_list.append(f'image_url: {item["image_url"]["url"]}')
114
145
  content_str = ", ".join(content_str_list)
115
146
  formatted_messages.append(f"{role}: {content_str}")
@@ -117,57 +148,87 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
117
148
  formatted_messages.append(f"{role}: {content}")
118
149
  prompt = "\n".join(formatted_messages)
119
150
 
151
+ request_model = self._kwargs.get("model", "gpt-4o")
152
+
120
153
  # Calculate tokens using input prompt and aggregated response
121
- prompt_tokens = openai_tokens(prompt,
122
- self._kwargs.get("model", "gpt-3.5-turbo"))
123
- completion_tokens = openai_tokens(self._llmresponse,
124
- self._kwargs.get("model", "gpt-3.5-turbo"))
154
+ input_tokens = openai_tokens(prompt,
155
+ request_model)
156
+ output_tokens = openai_tokens(self._llmresponse,
157
+ request_model)
125
158
 
126
159
  # Calculate cost of the operation
127
- cost = get_chat_model_cost(self._kwargs.get("model", "gpt-3.5-turbo"),
128
- pricing_info, prompt_tokens,
129
- completion_tokens)
160
+ cost = get_chat_model_cost(request_model,
161
+ pricing_info, input_tokens,
162
+ output_tokens)
130
163
 
131
- # Set Span attributes
164
+ # Set Span attributes (OTel Semconv)
132
165
  self._span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
166
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
167
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
133
168
  self._span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
134
169
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
135
- self._span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
136
- SemanticConvetion.GEN_AI_TYPE_CHAT)
137
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
138
- gen_ai_endpoint)
170
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
171
+ request_model)
172
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
173
+ self._kwargs.get("seed", ""))
174
+ self._span.set_attribute(SemanticConvetion.SERVER_PORT,
175
+ self._server_port)
176
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
177
+ self._kwargs.get("frequency_penalty", 0.0))
178
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
179
+ self._kwargs.get("max_tokens", -1))
180
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
181
+ self._kwargs.get("presence_penalty", 0.0))
182
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
183
+ self._kwargs.get("stop", []))
184
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
185
+ self._kwargs.get("temperature", 1.0))
186
+ self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
187
+ self._kwargs.get("top_p", 1.0))
188
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
189
+ [self._finish_reason])
139
190
  self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
140
191
  self._response_id)
141
- self._span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
192
+ self._span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
193
+ self._response_model)
194
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
195
+ input_tokens)
196
+ self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
197
+ output_tokens)
198
+ self._span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
199
+ self._server_address)
200
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
201
+ self._kwargs.get("service_tier", "auto"))
202
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
203
+ self._openai_response_service_tier)
204
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
205
+ self._openai_system_fingerprint)
206
+ if isinstance(self._llmresponse, str):
207
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
208
+ "text")
209
+ else:
210
+ self._span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
211
+ "json")
212
+
213
+ # Set Span attributes (Extra)
214
+ self._span.set_attribute(DEPLOYMENT_ENVIRONMENT,
142
215
  environment)
143
- self._span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
216
+ self._span.set_attribute(SERVICE_NAME,
144
217
  application_name)
145
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
146
- self._kwargs.get("model", "gpt-3.5-turbo"))
147
218
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
148
219
  self._kwargs.get("user", ""))
149
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
150
- self._kwargs.get("top_p", 1.0))
151
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
152
- self._kwargs.get("max_tokens", -1))
153
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
154
- self._kwargs.get("temperature", 1.0))
155
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
156
- self._kwargs.get("presence_penalty", 0.0))
157
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
158
- self._kwargs.get("frequency_penalty", 0.0))
159
- self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
160
- self._kwargs.get("seed", ""))
161
220
  self._span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
162
221
  True)
163
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
164
- prompt_tokens)
165
- self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
166
- completion_tokens)
167
222
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
168
- prompt_tokens + completion_tokens)
223
+ input_tokens + output_tokens)
169
224
  self._span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
170
225
  cost)
226
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TBT,
227
+ self._tbt)
228
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
229
+ self._ttft)
230
+ self._span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
231
+ version)
171
232
  if trace_content:
172
233
  self._span.add_event(
173
234
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -181,31 +242,35 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
181
242
  SemanticConvetion.GEN_AI_CONTENT_COMPLETION: self._llmresponse,
182
243
  },
183
244
  )
184
-
185
245
  self._span.set_status(Status(StatusCode.OK))
186
246
 
187
247
  if disable_metrics is False:
188
- attributes = {
189
- TELEMETRY_SDK_NAME:
190
- "openlit",
191
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
192
- application_name,
193
- SemanticConvetion.GEN_AI_SYSTEM:
194
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
195
- SemanticConvetion.GEN_AI_ENVIRONMENT:
196
- environment,
197
- SemanticConvetion.GEN_AI_TYPE:
198
- SemanticConvetion.GEN_AI_TYPE_CHAT,
199
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
200
- self._kwargs.get("model", "gpt-3.5-turbo")
201
- }
248
+ attributes = create_metrics_attributes(
249
+ service_name=application_name,
250
+ deployment_environment=environment,
251
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
252
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
253
+ request_model=request_model,
254
+ server_address=self._server_address,
255
+ server_port=self._server_port,
256
+ response_model=self._response_model,
257
+ )
202
258
 
203
- metrics["genai_requests"].add(1, attributes)
204
- metrics["genai_total_tokens"].add(
205
- prompt_tokens + completion_tokens, attributes
259
+ metrics["genai_client_usage_tokens"].record(
260
+ input_tokens + output_tokens, attributes
261
+ )
262
+ metrics["genai_client_operation_duration"].record(
263
+ self._end_time - self._start_time, attributes
264
+ )
265
+ metrics["genai_server_tbt"].record(
266
+ self._tbt, attributes
206
267
  )
207
- metrics["genai_completion_tokens"].add(completion_tokens, attributes)
208
- metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
268
+ metrics["genai_server_ttft"].record(
269
+ self._ttft, attributes
270
+ )
271
+ metrics["genai_requests"].add(1, attributes)
272
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
273
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
209
274
  metrics["genai_cost"].record(cost, attributes)
210
275
 
211
276
  except Exception as e:
@@ -234,20 +299,25 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
234
299
 
235
300
  # Check if streaming is enabled for the API call
236
301
  streaming = kwargs.get("stream", False)
302
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
303
+ request_model = kwargs.get("model", "gpt-4o")
304
+
305
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
237
306
 
238
307
  # pylint: disable=no-else-return
239
308
  if streaming:
240
309
  # Special handling for streaming response to accommodate the nature of data flow
241
310
  awaited_wrapped = wrapped(*args, **kwargs)
242
- span = tracer.start_span(gen_ai_endpoint, kind=SpanKind.CLIENT)
311
+ span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
243
312
 
244
- return TracedSyncStream(awaited_wrapped, span, kwargs)
313
+ return TracedSyncStream(awaited_wrapped, span, kwargs, server_address, server_port)
245
314
 
246
315
  # Handling for non-streaming responses
247
316
  else:
248
- # pylint: disable=line-too-long
249
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
317
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
318
+ start_time = time.time()
250
319
  response = wrapped(*args, **kwargs)
320
+ end_time = time.time()
251
321
 
252
322
  response_dict = response_as_dict(response)
253
323
 
@@ -261,7 +331,6 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
261
331
 
262
332
  if isinstance(content, list):
263
333
  content_str = ", ".join(
264
- # pylint: disable=line-too-long
265
334
  f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
266
335
  if "type" in item else f'text: {item["text"]}'
267
336
  for item in content
@@ -271,38 +340,72 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
271
340
  formatted_messages.append(f"{role}: {content}")
272
341
  prompt = "\n".join(formatted_messages)
273
342
 
274
- # Set base span attribues
343
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
344
+ output_tokens = response_dict.get('usage').get('completion_tokens')
345
+
346
+ # Calculate cost of the operation
347
+ cost = get_chat_model_cost(request_model,
348
+ pricing_info, input_tokens,
349
+ output_tokens)
350
+
351
+ # Set base span attribues (OTel Semconv)
275
352
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
353
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
354
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT)
276
355
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
277
356
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
278
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
279
- SemanticConvetion.GEN_AI_TYPE_CHAT)
280
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
281
- gen_ai_endpoint)
357
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
358
+ request_model)
359
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
360
+ kwargs.get("seed", ""))
361
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
362
+ server_port)
363
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
364
+ kwargs.get("frequency_penalty", 0.0))
365
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
366
+ kwargs.get("max_tokens", -1))
367
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
368
+ kwargs.get("presence_penalty", 0.0))
369
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_STOP_SEQUENCES,
370
+ kwargs.get("stop", []))
371
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
372
+ kwargs.get("temperature", 1.0))
373
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
374
+ kwargs.get("top_p", 1.0))
282
375
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
283
376
  response_dict.get("id"))
284
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
377
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
378
+ response_dict.get('model'))
379
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
380
+ input_tokens)
381
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_OUTPUT_TOKENS,
382
+ output_tokens)
383
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
384
+ server_address)
385
+ span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_REQUEST_SERVICE_TIER,
386
+ kwargs.get("service_tier", "auto"))
387
+ span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER,
388
+ response_dict.get('service_tier'))
389
+ span.set_attribute(SemanticConvetion.GEN_AI_OPENAI_RESPONSE_SYSTEM_FINGERPRINT,
390
+ response_dict.get('system_fingerprint'))
391
+
392
+ # Set base span attribues (Extras)
393
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
285
394
  environment)
286
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
395
+ span.set_attribute(SERVICE_NAME,
287
396
  application_name)
288
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
289
- kwargs.get("model", "gpt-3.5-turbo"))
290
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
291
- kwargs.get("top_p", 1.0))
292
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
293
- kwargs.get("max_tokens", -1))
294
397
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
295
398
  kwargs.get("user", ""))
296
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
297
- kwargs.get("temperature", 1.0))
298
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
299
- kwargs.get("presence_penalty", 0.0))
300
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
301
- kwargs.get("frequency_penalty", 0.0))
302
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
303
- kwargs.get("seed", ""))
304
399
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
305
400
  False)
401
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
402
+ input_tokens + output_tokens)
403
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
404
+ cost)
405
+ span.set_attribute(SemanticConvetion.GEN_AI_SERVER_TTFT,
406
+ end_time - start_time)
407
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
408
+ version)
306
409
  if trace_content:
307
410
  span.add_event(
308
411
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -311,93 +414,54 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
311
414
  },
312
415
  )
313
416
 
314
- # Set span attributes when tools is not passed to the function call
315
- if "tools" not in kwargs:
316
- # Calculate cost of the operation
317
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
318
- pricing_info, response_dict.get('usage', {}).get('prompt_tokens', None),
319
- response_dict.get('usage', {}).get('completion_tokens', None))
320
-
321
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
322
- response_dict.get('usage', {}).get('prompt_tokens', None))
323
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
324
- response_dict.get('usage', {}).get('completion_tokens', None))
325
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
326
- response_dict.get('usage', {}).get('total_tokens', None))
417
+ for i in range(kwargs.get('n',1)):
327
418
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
328
- [response_dict.get('choices', [])[0].get('finish_reason', None)])
329
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
330
- cost)
331
-
332
- # Set span attributes for when n = 1 (default)
333
- if "n" not in kwargs or kwargs["n"] == 1:
334
- if trace_content:
335
- span.add_event(
336
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
337
- attributes={
338
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices', [])[0].get("message").get("content"),
339
- },
340
- )
341
-
342
- # Set span attributes for when n > 0
343
- else:
344
- i = 0
345
- while i < kwargs["n"] and trace_content is True:
346
- attribute_name = f"gen_ai.content.completion.{i}"
347
- span.add_event(
348
- name=attribute_name,
349
- attributes={
350
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response_dict.get('choices')[i].get("message").get("content"),
351
- },
352
- )
353
- i += 1
354
-
355
- # Return original response
356
- return response
357
-
358
- # Set span attributes when tools is passed to the function call
359
- elif "tools" in kwargs:
360
- # Calculate cost of the operation
361
- cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
362
- pricing_info, response_dict.get('usage').get('prompt_tokens'),
363
- response_dict.get('usage').get('completion_tokens'))
364
- span.add_event(
365
- name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
366
- attributes={
367
- SemanticConvetion.GEN_AI_CONTENT_COMPLETION: "Function called with tools",
368
- },
369
- )
370
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
371
- response_dict.get('usage').get('prompt_tokens'))
372
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
373
- response_dict.get('usage').get('completion_tokens'))
374
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
375
- response_dict.get('usage').get('total_tokens'))
376
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
377
- cost)
419
+ [response_dict.get('choices')[i].get('finish_reason')])
420
+ if trace_content:
421
+ span.add_event(
422
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
423
+ attributes={
424
+ # pylint: disable=line-too-long
425
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: str(response_dict.get('choices')[i].get('message').get('content')),
426
+ },
427
+ )
428
+ if kwargs.get('tools'):
429
+ span.set_attribute(SemanticConvetion.GEN_AI_TOOL_CALLS,
430
+ str(response_dict.get('choices')[i].get('message').get('tool_calls')))
431
+
432
+ if isinstance(response_dict.get('choices')[i].get('message').get('content'), str):
433
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
434
+ "text")
435
+ elif response_dict.get('choices')[i].get('message').get('content') is not None:
436
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
437
+ "json")
378
438
 
379
439
  span.set_status(Status(StatusCode.OK))
380
440
 
381
441
  if disable_metrics is False:
382
- attributes = {
383
- TELEMETRY_SDK_NAME:
384
- "openlit",
385
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
386
- application_name,
387
- SemanticConvetion.GEN_AI_SYSTEM:
388
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
389
- SemanticConvetion.GEN_AI_ENVIRONMENT:
390
- environment,
391
- SemanticConvetion.GEN_AI_TYPE:
392
- SemanticConvetion.GEN_AI_TYPE_CHAT,
393
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
394
- kwargs.get("model", "gpt-3.5-turbo")
395
- }
442
+ attributes = create_metrics_attributes(
443
+ service_name=application_name,
444
+ deployment_environment=environment,
445
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_CHAT,
446
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
447
+ request_model=request_model,
448
+ server_address=server_address,
449
+ server_port=server_port,
450
+ response_model=response_dict.get('model'),
451
+ )
396
452
 
453
+ metrics["genai_client_usage_tokens"].record(
454
+ input_tokens + output_tokens, attributes
455
+ )
456
+ metrics["genai_client_operation_duration"].record(
457
+ end_time - start_time, attributes
458
+ )
459
+ metrics["genai_server_ttft"].record(
460
+ end_time - start_time, attributes
461
+ )
397
462
  metrics["genai_requests"].add(1, attributes)
398
- metrics["genai_total_tokens"].add(response_dict.get('usage').get('total_tokens'), attributes)
399
- metrics["genai_completion_tokens"].add(response_dict.get('usage').get('completion_tokens'), attributes)
400
- metrics["genai_prompt_tokens"].add(response_dict.get('usage').get('prompt_tokens'), attributes)
463
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
464
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
401
465
  metrics["genai_cost"].record(cost, attributes)
402
466
 
403
467
  # Return original response
@@ -412,13 +476,12 @@ def chat_completions(gen_ai_endpoint, version, environment, application_name,
412
476
 
413
477
  return wrapper
414
478
 
415
- def embedding(gen_ai_endpoint, version, environment, application_name,
479
+ def embedding(version, environment, application_name,
416
480
  tracer, pricing_info, trace_content, metrics, disable_metrics):
417
481
  """
418
482
  Generates a telemetry wrapper for embeddings to collect metrics.
419
483
 
420
484
  Args:
421
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
422
485
  version: Version of the monitoring package.
423
486
  environment: Deployment environment (e.g., production, staging).
424
487
  application_name: Name of the application using the OpenAI API.
@@ -447,40 +510,56 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
447
510
  The response from the original 'embeddings' method.
448
511
  """
449
512
 
450
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
513
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
514
+ request_model = kwargs.get("model", "text-embedding-ada-002")
515
+
516
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING} {request_model}"
517
+
518
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
519
+ start_time = time.time()
451
520
  response = wrapped(*args, **kwargs)
521
+ end_time = time.time()
522
+
452
523
  response_dict = response_as_dict(response)
453
524
  try:
525
+ input_tokens = response_dict.get('usage').get('prompt_tokens')
526
+
454
527
  # Calculate cost of the operation
455
- cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
456
- pricing_info, response_dict.get('usage').get('prompt_tokens'))
528
+ cost = get_embed_model_cost(request_model,
529
+ pricing_info, input_tokens)
457
530
 
458
- # Set Span attributes
531
+ # Set Span attributes (OTel Semconv)
459
532
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
533
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
534
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING)
460
535
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
461
536
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
462
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
463
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
464
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
465
- gen_ai_endpoint)
466
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
537
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
538
+ request_model)
539
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_ENCODING_FORMATS,
540
+ [kwargs.get('encoding_format', 'float')])
541
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
542
+ request_model)
543
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
544
+ server_address)
545
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
546
+ server_port)
547
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_INPUT_TOKENS,
548
+ input_tokens)
549
+
550
+ # Set Span attributes (Extras)
551
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
467
552
  environment)
468
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
553
+ span.set_attribute(SERVICE_NAME,
469
554
  application_name)
470
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
471
- kwargs.get("model", "text-embedding-ada-002"))
472
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
473
- kwargs.get("encoding_format", "float"))
474
- # span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
475
- # kwargs.get("dimensions", "null"))
476
555
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
477
556
  kwargs.get("user", ""))
478
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
479
- response_dict.get('usage').get('prompt_tokens'))
480
557
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
481
- response_dict.get('usage').get('total_tokens'))
558
+ input_tokens)
482
559
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
483
560
  cost)
561
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
562
+ version)
484
563
 
485
564
  if trace_content:
486
565
  span.add_event(
@@ -493,26 +572,24 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
493
572
  span.set_status(Status(StatusCode.OK))
494
573
 
495
574
  if disable_metrics is False:
496
- attributes = {
497
- TELEMETRY_SDK_NAME:
498
- "openlit",
499
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
500
- application_name,
501
- SemanticConvetion.GEN_AI_SYSTEM:
502
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
503
- SemanticConvetion.GEN_AI_ENVIRONMENT:
504
- environment,
505
- SemanticConvetion.GEN_AI_TYPE:
506
- SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
507
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
508
- kwargs.get("model", "text-embedding-ada-002")
509
- }
510
-
575
+ attributes = create_metrics_attributes(
576
+ service_name=application_name,
577
+ deployment_environment=environment,
578
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_EMBEDDING,
579
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
580
+ request_model=request_model,
581
+ server_address=server_address,
582
+ server_port=server_port,
583
+ response_model=request_model,
584
+ )
585
+ metrics["genai_client_usage_tokens"].record(
586
+ input_tokens, attributes
587
+ )
588
+ metrics["genai_client_operation_duration"].record(
589
+ end_time - start_time, attributes
590
+ )
511
591
  metrics["genai_requests"].add(1, attributes)
512
- metrics["genai_total_tokens"].add(
513
- response_dict.get('usage').get('total_tokens'), attributes)
514
- metrics["genai_prompt_tokens"].add(
515
- response_dict.get('usage').get('prompt_tokens'), attributes)
592
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
516
593
  metrics["genai_cost"].record(cost, attributes)
517
594
 
518
595
  # Return original response
@@ -527,118 +604,12 @@ def embedding(gen_ai_endpoint, version, environment, application_name,
527
604
 
528
605
  return wrapper
529
606
 
530
- def finetune(gen_ai_endpoint, version, environment, application_name,
531
- tracer, pricing_info, trace_content, metrics, disable_metrics):
532
- """
533
- Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
534
-
535
- Args:
536
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
537
- version: Version of the monitoring package.
538
- environment: Deployment environment (e.g., production, staging).
539
- application_name: Name of the application using the OpenAI API.
540
- tracer: OpenTelemetry tracer for creating spans.
541
- pricing_info: Information used for calculating the cost of OpenAI usage.
542
- trace_content: Flag indicating whether to trace the actual content.
543
-
544
- Returns:
545
- A function that wraps the fine tuning creation method to add telemetry.
546
- """
547
-
548
- def wrapper(wrapped, instance, args, kwargs):
549
- """
550
- Wraps the 'fine_tuning.jobs.create' API call to add telemetry.
551
-
552
- This collects metrics such as execution time, usage stats, and handles errors
553
- gracefully, adding details to the trace for observability.
554
-
555
- Args:
556
- wrapped: The original 'fine_tuning.jobs.create' method to be wrapped.
557
- instance: The instance of the class where the original method is defined.
558
- args: Positional arguments for the method.
559
- kwargs: Keyword arguments for the method.
560
-
561
- Returns:
562
- The response from the original 'fine_tuning.jobs.create' method.
563
- """
564
-
565
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
566
- response = wrapped(*args, **kwargs)
567
-
568
- # Handling exception ensure observability without disrupting operation
569
- try:
570
- # Set Span attributes
571
- span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
572
- span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
573
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
574
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
575
- SemanticConvetion.GEN_AI_TYPE_FINETUNING)
576
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
577
- gen_ai_endpoint)
578
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
579
- environment)
580
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
581
- application_name)
582
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
583
- kwargs.get("model", "gpt-3.5-turbo"))
584
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TRAINING_FILE,
585
- kwargs.get("training_file", ""))
586
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_VALIDATION_FILE,
587
- kwargs.get("validation_file", ""))
588
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_BATCH_SIZE,
589
- kwargs.get("hyperparameters.batch_size", "auto"))
590
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_LRM,
591
- kwargs.get("hyperparameters.learning_rate_multiplier",
592
- "auto"))
593
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_EPOCHS,
594
- kwargs.get("hyperparameters.n_epochs", "auto"))
595
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_SUFFIX,
596
- kwargs.get("suffix", ""))
597
- span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
598
- response.id)
599
- span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
600
- response.usage.prompt_tokens)
601
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_STATUS,
602
- response.status)
603
- span.set_status(Status(StatusCode.OK))
604
-
605
- if disable_metrics is False:
606
- attributes = {
607
- TELEMETRY_SDK_NAME:
608
- "openlit",
609
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
610
- application_name,
611
- SemanticConvetion.GEN_AI_SYSTEM:
612
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
613
- SemanticConvetion.GEN_AI_ENVIRONMENT:
614
- environment,
615
- SemanticConvetion.GEN_AI_TYPE:
616
- SemanticConvetion.GEN_AI_TYPE_FINETUNING,
617
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
618
- kwargs.get("model", "gpt-3.5-turbo")
619
- }
620
-
621
- metrics["genai_requests"].add(1, attributes)
622
-
623
- # Return original response
624
- return response
625
-
626
- except Exception as e:
627
- handle_exception(span, e)
628
- logger.error("Error in trace creation: %s", e)
629
-
630
- # Return original response
631
- return response
632
-
633
- return wrapper
634
-
635
- def image_generate(gen_ai_endpoint, version, environment, application_name,
607
+ def image_generate(version, environment, application_name,
636
608
  tracer, pricing_info, trace_content, metrics, disable_metrics):
637
609
  """
638
610
  Generates a telemetry wrapper for image generation to collect metrics.
639
611
 
640
612
  Args:
641
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
642
613
  version: Version of the monitoring package.
643
614
  environment: Deployment environment (e.g., production, staging).
644
615
  application_name: Name of the application using the OpenAI API.
@@ -667,8 +638,16 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
667
638
  The response from the original 'images.generate' method.
668
639
  """
669
640
 
670
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
641
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
642
+ request_model = kwargs.get("model", "dall-e-2")
643
+
644
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
645
+
646
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
647
+ start_time = time.time()
671
648
  response = wrapped(*args, **kwargs)
649
+ end_time = time.time()
650
+
672
651
  images_count = 0
673
652
 
674
653
  try:
@@ -678,28 +657,38 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
678
657
  else:
679
658
  image = "url"
680
659
 
660
+ request_model = kwargs.get("model", "dall-e-2")
661
+
681
662
  # Calculate cost of the operation
682
- cost = get_image_model_cost(kwargs.get("model", "dall-e-2"),
663
+ cost = get_image_model_cost(request_model,
683
664
  pricing_info, kwargs.get("size", "1024x1024"),
684
665
  kwargs.get("quality", "standard"))
685
666
 
686
667
  for items in response.data:
687
- # Set Span attributes
668
+ # Set Span attributes (OTel Semconv)
688
669
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
670
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
671
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
689
672
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
690
673
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
691
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
692
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
693
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
694
- gen_ai_endpoint)
674
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
675
+ request_model)
676
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
677
+ server_address)
678
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
679
+ server_port)
695
680
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
696
681
  response.created)
697
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
682
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
683
+ request_model)
684
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
685
+ "image")
686
+
687
+ # Set Span attributes (Extras)
688
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
698
689
  environment)
699
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
690
+ span.set_attribute(SERVICE_NAME,
700
691
  application_name)
701
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
702
- kwargs.get("model", "dall-e-2"))
703
692
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
704
693
  kwargs.get("size", "1024x1024"))
705
694
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
@@ -710,6 +699,9 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
710
699
  items.revised_prompt if items.revised_prompt else "")
711
700
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
712
701
  kwargs.get("user", ""))
702
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
703
+ version)
704
+
713
705
  if trace_content:
714
706
  span.add_event(
715
707
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -717,7 +709,7 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
717
709
  SemanticConvetion.GEN_AI_CONTENT_PROMPT: kwargs.get("prompt", ""),
718
710
  },
719
711
  )
720
- attribute_name = f"gen_ai.response.image.{images_count}"
712
+ attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
721
713
  span.add_event(
722
714
  name=attribute_name,
723
715
  attributes={
@@ -732,21 +724,20 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
732
724
  span.set_status(Status(StatusCode.OK))
733
725
 
734
726
  if disable_metrics is False:
735
- attributes = {
736
- TELEMETRY_SDK_NAME:
737
- "openlit",
738
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
739
- application_name,
740
- SemanticConvetion.GEN_AI_SYSTEM:
741
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
742
- SemanticConvetion.GEN_AI_ENVIRONMENT:
743
- environment,
744
- SemanticConvetion.GEN_AI_TYPE:
745
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
746
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
747
- kwargs.get("model", "dall-e-2")
748
- }
727
+ attributes = create_metrics_attributes(
728
+ service_name=application_name,
729
+ deployment_environment=environment,
730
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
731
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
732
+ request_model=request_model,
733
+ server_address=server_address,
734
+ server_port=server_port,
735
+ response_model=request_model,
736
+ )
749
737
 
738
+ metrics["genai_client_operation_duration"].record(
739
+ end_time - start_time, attributes
740
+ )
750
741
  metrics["genai_requests"].add(1, attributes)
751
742
  metrics["genai_cost"].record(cost, attributes)
752
743
 
@@ -762,13 +753,12 @@ def image_generate(gen_ai_endpoint, version, environment, application_name,
762
753
 
763
754
  return wrapper
764
755
 
765
- def image_variatons(gen_ai_endpoint, version, environment, application_name,
756
+ def image_variatons(version, environment, application_name,
766
757
  tracer, pricing_info, trace_content, metrics, disable_metrics):
767
758
  """
768
759
  Generates a telemetry wrapper for creating image variations to collect metrics.
769
760
 
770
761
  Args:
771
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
772
762
  version: Version of the monitoring package.
773
763
  environment: Deployment environment (e.g., production, staging).
774
764
  application_name: Name of the application using the OpenAI API.
@@ -797,8 +787,16 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
797
787
  The response from the original 'images.create.variations' method.
798
788
  """
799
789
 
800
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
790
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
791
+ request_model = kwargs.get("model", "dall-e-2")
792
+
793
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE} {request_model}"
794
+
795
+ with tracer.start_as_current_span(span_name, kind= SpanKind.CLIENT) as span:
796
+ start_time = time.time()
801
797
  response = wrapped(*args, **kwargs)
798
+ end_time = time.time()
799
+
802
800
  images_count = 0
803
801
 
804
802
  try:
@@ -809,34 +807,45 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
809
807
  image = "url"
810
808
 
811
809
  # Calculate cost of the operation
812
- cost = get_image_model_cost(kwargs.get("model", "dall-e-2"), pricing_info,
810
+ cost = get_image_model_cost(request_model, pricing_info,
813
811
  kwargs.get("size", "1024x1024"), "standard")
814
812
 
815
813
  for items in response.data:
816
- # Set Span attributes
814
+ # Set Span attributes (OTel Semconv)
817
815
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
816
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
817
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE)
818
818
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
819
819
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
820
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
821
- SemanticConvetion.GEN_AI_TYPE_IMAGE)
822
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
823
- gen_ai_endpoint)
820
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
821
+ request_model)
822
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
823
+ server_address)
824
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
825
+ server_port)
824
826
  span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
825
827
  response.created)
826
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
828
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
829
+ request_model)
830
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
831
+ "image")
832
+
833
+ # Set Span attributes (Extras)
834
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
827
835
  environment)
828
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
836
+ span.set_attribute(SERVICE_NAME,
829
837
  application_name)
830
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
831
- kwargs.get("model", "dall-e-2"))
832
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
833
- kwargs.get("user", ""))
834
838
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_SIZE,
835
839
  kwargs.get("size", "1024x1024"))
836
840
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IMAGE_QUALITY,
837
841
  "standard")
842
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
843
+ kwargs.get("user", ""))
844
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
845
+ version)
846
+
838
847
  if trace_content:
839
- attribute_name = f"gen_ai.response.image.{images_count}"
848
+ attribute_name = f"{SemanticConvetion.GEN_AI_RESPONSE_IMAGE}.{images_count}"
840
849
  span.add_event(
841
850
  name=attribute_name,
842
851
  attributes={
@@ -851,21 +860,20 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
851
860
  span.set_status(Status(StatusCode.OK))
852
861
 
853
862
  if disable_metrics is False:
854
- attributes = {
855
- TELEMETRY_SDK_NAME:
856
- "openlit",
857
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
858
- application_name,
859
- SemanticConvetion.GEN_AI_SYSTEM:
860
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
861
- SemanticConvetion.GEN_AI_ENVIRONMENT:
862
- environment,
863
- SemanticConvetion.GEN_AI_TYPE:
864
- SemanticConvetion.GEN_AI_TYPE_IMAGE,
865
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
866
- kwargs.get("model", "dall-e-2")
867
- }
863
+ attributes = create_metrics_attributes(
864
+ service_name=application_name,
865
+ deployment_environment=environment,
866
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_IMAGE,
867
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
868
+ request_model=request_model,
869
+ server_address=server_address,
870
+ server_port=server_port,
871
+ response_model=request_model,
872
+ )
868
873
 
874
+ metrics["genai_client_operation_duration"].record(
875
+ end_time - start_time, attributes
876
+ )
869
877
  metrics["genai_requests"].add(1, attributes)
870
878
  metrics["genai_cost"].record(cost, attributes)
871
879
 
@@ -881,13 +889,12 @@ def image_variatons(gen_ai_endpoint, version, environment, application_name,
881
889
 
882
890
  return wrapper
883
891
 
884
- def audio_create(gen_ai_endpoint, version, environment, application_name,
892
+ def audio_create(version, environment, application_name,
885
893
  tracer, pricing_info, trace_content, metrics, disable_metrics):
886
894
  """
887
895
  Generates a telemetry wrapper for creating speech audio to collect metrics.
888
896
 
889
897
  Args:
890
- gen_ai_endpoint: Endpoint identifier for logging and tracing.
891
898
  version: Version of the monitoring package.
892
899
  environment: Deployment environment (e.g., production, staging).
893
900
  application_name: Name of the application using the OpenAI API.
@@ -916,28 +923,42 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
916
923
  The response from the original 'audio.speech.create' method.
917
924
  """
918
925
 
919
- with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
926
+ server_address, server_port = set_server_address_and_port(instance, "api.openai.com", 443)
927
+ request_model = kwargs.get("model", "tts-1")
928
+
929
+ span_name = f"{SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO} {request_model}"
930
+
931
+ with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
932
+ start_time = time.time()
920
933
  response = wrapped(*args, **kwargs)
934
+ end_time = time.time()
921
935
 
922
936
  try:
923
937
  # Calculate cost of the operation
924
- cost = get_audio_model_cost(kwargs.get("model", "tts-1"),
938
+ cost = get_audio_model_cost(request_model,
925
939
  pricing_info, kwargs.get("input", ""))
926
940
 
927
941
  # Set Span attributes
928
942
  span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
943
+ span.set_attribute(SemanticConvetion.GEN_AI_OPERATION,
944
+ SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO)
929
945
  span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
930
946
  SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
931
- span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
932
- SemanticConvetion.GEN_AI_TYPE_AUDIO)
933
- span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
934
- gen_ai_endpoint)
935
- span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
947
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
948
+ request_model)
949
+ span.set_attribute(SemanticConvetion.SERVER_ADDRESS,
950
+ server_address)
951
+ span.set_attribute(SemanticConvetion.SERVER_PORT,
952
+ server_port)
953
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_MODEL,
954
+ request_model)
955
+ span.set_attribute(SemanticConvetion.GEN_AI_OUTPUT_TYPE,
956
+ "speech")
957
+
958
+ span.set_attribute(DEPLOYMENT_ENVIRONMENT,
936
959
  environment)
937
- span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
960
+ span.set_attribute(SERVICE_NAME,
938
961
  application_name)
939
- span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
940
- kwargs.get("model", "tts-1"))
941
962
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_VOICE,
942
963
  kwargs.get("voice", "alloy"))
943
964
  span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
@@ -946,6 +967,8 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
946
967
  kwargs.get("speed", 1))
947
968
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
948
969
  cost)
970
+ span.set_attribute(SemanticConvetion.GEN_AI_SDK_VERSION,
971
+ version)
949
972
  if trace_content:
950
973
  span.add_event(
951
974
  name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
@@ -957,21 +980,20 @@ def audio_create(gen_ai_endpoint, version, environment, application_name,
957
980
  span.set_status(Status(StatusCode.OK))
958
981
 
959
982
  if disable_metrics is False:
960
- attributes = {
961
- TELEMETRY_SDK_NAME:
962
- "openlit",
963
- SemanticConvetion.GEN_AI_APPLICATION_NAME:
964
- application_name,
965
- SemanticConvetion.GEN_AI_SYSTEM:
966
- SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
967
- SemanticConvetion.GEN_AI_ENVIRONMENT:
968
- environment,
969
- SemanticConvetion.GEN_AI_TYPE:
970
- SemanticConvetion.GEN_AI_TYPE_AUDIO,
971
- SemanticConvetion.GEN_AI_REQUEST_MODEL:
972
- kwargs.get("model", "tts-1")
973
- }
983
+ attributes = create_metrics_attributes(
984
+ service_name=application_name,
985
+ deployment_environment=environment,
986
+ operation=SemanticConvetion.GEN_AI_OPERATION_TYPE_AUDIO,
987
+ system=SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
988
+ request_model=request_model,
989
+ server_address=server_address,
990
+ server_port=server_port,
991
+ response_model=request_model,
992
+ )
974
993
 
994
+ metrics["genai_client_operation_duration"].record(
995
+ end_time - start_time, attributes
996
+ )
975
997
  metrics["genai_requests"].add(1, attributes)
976
998
  metrics["genai_cost"].record(cost, attributes)
977
999