openlit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,840 @@
1
+ # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
2
+ """
3
+ Module for monitoring Azure OpenAI API calls.
4
+ """
5
+
6
+ import logging
7
+ from opentelemetry.trace import SpanKind, Status, StatusCode
8
+ from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
+ from openlit.__helpers import get_chat_model_cost, get_embed_model_cost
10
+ from openlit.__helpers import get_image_model_cost, openai_tokens, handle_exception
11
+ from openlit.semcov import SemanticConvetion
12
+
13
+ # Initialize logger for logging potential issues and operations
14
+ logger = logging.getLogger(__name__)
15
+
16
+ def azure_chat_completions(gen_ai_endpoint, version, environment, application_name,
17
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
18
+ """
19
+ Generates a telemetry wrapper for chat completions to collect metrics.
20
+
21
+ Args:
22
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
23
+ version: Version of the monitoring package.
24
+ environment: Deployment environment (e.g., production, staging).
25
+ application_name: Name of the application using the OpenAI API.
26
+ tracer: OpenTelemetry tracer for creating spans.
27
+ pricing_info: Information used for calculating the cost of OpenAI usage.
28
+ trace_content: Flag indicating whether to trace the actual content.
29
+
30
+ Returns:
31
+ A function that wraps the chat completions method to add telemetry.
32
+ """
33
+
34
+ def wrapper(wrapped, instance, args, kwargs):
35
+ """
36
+ Wraps the 'chat.completions' API call to add telemetry.
37
+
38
+ This collects metrics such as execution time, cost, and token usage, and handles errors
39
+ gracefully, adding details to the trace for observability.
40
+
41
+ Args:
42
+ wrapped: The original 'chat.completions' method to be wrapped.
43
+ instance: The instance of the class where the original method is defined.
44
+ args: Positional arguments for the 'chat.completions' method.
45
+ kwargs: Keyword arguments for the 'chat.completions' method.
46
+
47
+ Returns:
48
+ The response from the original 'chat.completions' method.
49
+ """
50
+
51
+ # Check if streaming is enabled for the API call
52
+ streaming = kwargs.get("stream", False)
53
+
54
+ # pylint: disable=no-else-return
55
+ if streaming:
56
+ # Special handling for streaming response to accommodate the nature of data flow
57
+ def stream_generator():
58
+ # pylint: disable=line-too-long
59
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
60
+ # Placeholder for aggregating streaming response
61
+ llmresponse = ""
62
+
63
+ # Loop through streaming events capturing relevant details
64
+ for chunk in wrapped(*args, **kwargs):
65
+ # Collect message IDs and aggregated response from events
66
+ if len(chunk.choices) > 0:
67
+ # pylint: disable=line-too-long
68
+ if hasattr(chunk.choices[0], "delta") and hasattr(chunk.choices[0].delta, "content"):
69
+ content = chunk.choices[0].delta.content
70
+ if content:
71
+ llmresponse += content
72
+ yield chunk
73
+ response_id = chunk.id
74
+ model = "azure_" + chunk.model
75
+
76
+ # Handling exception ensure observability without disrupting operation
77
+ try:
78
+ # Format 'messages' into a single string
79
+ message_prompt = kwargs.get("messages", "")
80
+ formatted_messages = []
81
+ for message in message_prompt:
82
+ role = message["role"]
83
+ content = message["content"]
84
+
85
+ if isinstance(content, list):
86
+ content_str = ", ".join(
87
+ # pylint: disable=line-too-long
88
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
89
+ if "type" in item else f'text: {item["text"]}'
90
+ for item in content
91
+ )
92
+ formatted_messages.append(f"{role}: {content_str}")
93
+ else:
94
+ formatted_messages.append(f"{role}: {content}")
95
+ prompt = "\n".join(formatted_messages)
96
+
97
+ # Calculate tokens using input prompt and aggregated response
98
+ prompt_tokens = openai_tokens(prompt,
99
+ kwargs.get("model", "gpt-3.5-turbo"))
100
+ completion_tokens = openai_tokens(llmresponse,
101
+ kwargs.get("model", "gpt-3.5-turbo"))
102
+
103
+ # Calculate cost of the operation
104
+ cost = get_chat_model_cost(model, pricing_info,
105
+ prompt_tokens, completion_tokens)
106
+
107
+ # Set Span attributes
108
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
109
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
110
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
111
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
112
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
113
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
114
+ gen_ai_endpoint)
115
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
116
+ response_id)
117
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
118
+ environment)
119
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
120
+ application_name)
121
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
122
+ model)
123
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
124
+ kwargs.get("user", ""))
125
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
126
+ kwargs.get("tool_choice", ""))
127
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
128
+ kwargs.get("temperature", 1))
129
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
130
+ kwargs.get("presence_penalty", 0))
131
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
132
+ kwargs.get("frequency_penalty", 0))
133
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
134
+ kwargs.get("seed", ""))
135
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
136
+ True)
137
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
138
+ prompt_tokens)
139
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
140
+ completion_tokens)
141
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
142
+ prompt_tokens + completion_tokens)
143
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
144
+ cost)
145
+ if trace_content:
146
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
147
+ prompt)
148
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
149
+ llmresponse)
150
+
151
+ span.set_status(Status(StatusCode.OK))
152
+
153
+ if disable_metrics is False:
154
+ attributes = {
155
+ TELEMETRY_SDK_NAME:
156
+ "openlit",
157
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
158
+ application_name,
159
+ SemanticConvetion.GEN_AI_SYSTEM:
160
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
161
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
162
+ environment,
163
+ SemanticConvetion.GEN_AI_TYPE:
164
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
165
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
166
+ model
167
+ }
168
+
169
+ metrics["genai_requests"].add(1, attributes)
170
+ metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
171
+ metrics["genai_completion_tokens"].add(completion_tokens, attributes)
172
+ metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
173
+ metrics["genai_cost"].record(cost, attributes)
174
+
175
+ except Exception as e:
176
+ handle_exception(span, e)
177
+ logger.error("Error in trace creation: %s", e)
178
+
179
+ return stream_generator()
180
+
181
+ # Handling for non-streaming responses
182
+ else:
183
+ # pylint: disable=line-too-long
184
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
185
+ response = wrapped(*args, **kwargs)
186
+
187
+ try:
188
+ # Find base model from response
189
+ model = "azure_" + response.model
190
+
191
+ # Format 'messages' into a single string
192
+ message_prompt = kwargs.get("messages", "")
193
+ formatted_messages = []
194
+ for message in message_prompt:
195
+ role = message["role"]
196
+ content = message["content"]
197
+
198
+ if isinstance(content, list):
199
+ content_str = ", ".join(
200
+ # pylint: disable=line-too-long
201
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
202
+ if "type" in item else f'text: {item["text"]}'
203
+ for item in content
204
+ )
205
+ formatted_messages.append(f"{role}: {content_str}")
206
+ else:
207
+ formatted_messages.append(f"{role}: {content}")
208
+ prompt = "\n".join(formatted_messages)
209
+
210
+ # Set base span attribues
211
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
212
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
213
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
214
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
215
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
216
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
217
+ gen_ai_endpoint)
218
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
219
+ response.id)
220
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
221
+ environment)
222
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
223
+ application_name)
224
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
225
+ model)
226
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
227
+ kwargs.get("user", ""))
228
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
229
+ kwargs.get("tool_choice", ""))
230
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
231
+ kwargs.get("temperature", 1))
232
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
233
+ kwargs.get("presence_penalty", 0))
234
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
235
+ kwargs.get("frequency_penalty", 0))
236
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
237
+ kwargs.get("seed", ""))
238
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
239
+ False)
240
+ if trace_content:
241
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
242
+ prompt)
243
+
244
+ # Set span attributes when tools is not passed to the function call
245
+ if "tools" not in kwargs:
246
+ # Calculate cost of the operation
247
+ cost = get_chat_model_cost(model, pricing_info,
248
+ response.usage.prompt_tokens,
249
+ response.usage.completion_tokens)
250
+
251
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
252
+ response.usage.prompt_tokens)
253
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
254
+ response.usage.completion_tokens)
255
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
256
+ response.usage.total_tokens)
257
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
258
+ response.choices[0].finish_reason)
259
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
260
+ cost)
261
+
262
+ # Set span attributes for when n = 1 (default)
263
+ if "n" not in kwargs or kwargs["n"] == 1:
264
+ if trace_content:
265
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
266
+ response.choices[0].message.content)
267
+
268
+ # Set span attributes for when n > 0
269
+ else:
270
+ i = 0
271
+ while i < kwargs["n"] and trace_content is True:
272
+ attribute_name = f"gen_ai.content.completion.{i}"
273
+ span.set_attribute(attribute_name,
274
+ response.choices[i].message.content)
275
+ i += 1
276
+
277
+ # Return original response
278
+ return response
279
+
280
+ # Set span attributes when tools is passed to the function call
281
+ elif "tools" in kwargs:
282
+ # Calculate cost of the operation
283
+ cost = get_chat_model_cost(model, pricing_info,
284
+ response.usage.prompt_tokens,
285
+ response.usage.completion_tokens)
286
+
287
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
288
+ "Function called with tools")
289
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
290
+ response.usage.prompt_tokens)
291
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
292
+ response.usage.completion_tokens)
293
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
294
+ response.usage.total_tokens)
295
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
296
+ cost)
297
+
298
+ span.set_status(Status(StatusCode.OK))
299
+
300
+ if disable_metrics is False:
301
+ attributes = {
302
+ TELEMETRY_SDK_NAME:
303
+ "openlit",
304
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
305
+ application_name,
306
+ SemanticConvetion.GEN_AI_SYSTEM:
307
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
308
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
309
+ environment,
310
+ SemanticConvetion.GEN_AI_TYPE:
311
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
312
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
313
+ model
314
+ }
315
+
316
+ metrics["genai_requests"].add(1, attributes)
317
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
318
+ metrics["genai_completion_tokens"].add(response.usage.completion_tokens, attributes)
319
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
320
+ metrics["genai_cost"].record(cost, attributes)
321
+
322
+ # Return original response
323
+ return response
324
+
325
+ except Exception as e:
326
+ handle_exception(span, e)
327
+ logger.error("Error in trace creation: %s", e)
328
+
329
+ # Return original response
330
+ return response
331
+
332
+ return wrapper
333
+
334
+ def azure_completions(gen_ai_endpoint, version, environment, application_name,
335
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
336
+ """
337
+ Generates a telemetry wrapper for completions to collect metrics.
338
+
339
+ Args:
340
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
341
+ version: Version of the monitoring package.
342
+ environment: Deployment environment (e.g., production, staging).
343
+ application_name: Name of the application using the OpenAI API.
344
+ tracer: OpenTelemetry tracer for creating spans.
345
+ pricing_info: Information used for calculating the cost of OpenAI usage.
346
+ trace_content: Flag indicating whether to trace the actual content.
347
+
348
+ Returns:
349
+ A function that wraps the chat completions method to add telemetry.
350
+ """
351
+
352
+ def wrapper(wrapped, instance, args, kwargs):
353
+ """
354
+ Wraps the 'completions' API call to add telemetry.
355
+
356
+ This collects metrics such as execution time, cost, and token usage, and handles errors
357
+ gracefully, adding details to the trace for observability.
358
+
359
+ Args:
360
+ wrapped: The original 'completions' method to be wrapped.
361
+ instance: The instance of the class where the original method is defined.
362
+ args: Positional arguments for the 'completions' method.
363
+ kwargs: Keyword arguments for the 'completions' method.
364
+
365
+ Returns:
366
+ The response from the original 'chat.completions' method.
367
+ """
368
+
369
+ # Check if streaming is enabled for the API call
370
+ streaming = kwargs.get("stream", False)
371
+
372
+ # pylint: disable=no-else-return
373
+ if streaming:
374
+ # Special handling for streaming response to accommodate the nature of data flow
375
+ def stream_generator():
376
+ # pylint: disable=line-too-long
377
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
378
+ # Placeholder for aggregating streaming response
379
+ llmresponse = ""
380
+
381
+ # Loop through streaming events capturing relevant details
382
+ for chunk in wrapped(*args, **kwargs):
383
+ # Collect message IDs and aggregated response from events
384
+ if len(chunk.choices) > 0:
385
+ if hasattr(chunk.choices[0], "text"):
386
+ content = chunk.choices[0].text
387
+ if content:
388
+ llmresponse += content
389
+ yield chunk
390
+ response_id = chunk.id
391
+ model = "azure_" + chunk.model
392
+
393
+ # Handling exception ensure observability without disrupting operation
394
+ try:
395
+ prompt = kwargs.get("prompt", "")
396
+
397
+ # Calculate tokens using input prompt and aggregated response
398
+ prompt_tokens = openai_tokens(prompt, "gpt-3.5-turbo")
399
+ completion_tokens = openai_tokens(llmresponse, "gpt-3.5-turbo")
400
+
401
+ # Calculate cost of the operation
402
+ cost = get_chat_model_cost(model, pricing_info,
403
+ prompt_tokens, completion_tokens)
404
+
405
+ # Set Span attributes
406
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
407
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
408
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
409
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
410
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
411
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
412
+ gen_ai_endpoint)
413
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
414
+ response_id)
415
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
416
+ environment)
417
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
418
+ application_name)
419
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
420
+ model)
421
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
422
+ kwargs.get("user", ""))
423
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
424
+ kwargs.get("tool_choice", ""))
425
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
426
+ kwargs.get("temperature", 1))
427
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
428
+ kwargs.get("presence_penalty", 0))
429
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
430
+ kwargs.get("frequency_penalty", 0))
431
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
432
+ kwargs.get("seed", ""))
433
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
434
+ True)
435
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
436
+ prompt_tokens)
437
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
438
+ completion_tokens)
439
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
440
+ prompt_tokens + completion_tokens)
441
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
442
+ cost)
443
+ if trace_content:
444
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
445
+ prompt)
446
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
447
+ llmresponse)
448
+
449
+ span.set_status(Status(StatusCode.OK))
450
+
451
+ if disable_metrics is False:
452
+ attributes = {
453
+ TELEMETRY_SDK_NAME:
454
+ "openlit",
455
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
456
+ application_name,
457
+ SemanticConvetion.GEN_AI_SYSTEM:
458
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
459
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
460
+ environment,
461
+ SemanticConvetion.GEN_AI_TYPE:
462
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
463
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
464
+ model
465
+ }
466
+
467
+ metrics["genai_requests"].add(1, attributes)
468
+ metrics["genai_total_tokens"].add(prompt_tokens + completion_tokens, attributes)
469
+ metrics["genai_completion_tokens"].add(completion_tokens, attributes)
470
+ metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
471
+ metrics["genai_cost"].record(cost, attributes)
472
+
473
+ except Exception as e:
474
+ handle_exception(span, e)
475
+ logger.error("Error in trace creation: %s", e)
476
+
477
+ return stream_generator()
478
+
479
+ # Handling for non-streaming responses
480
+ else:
481
+ # pylint: disable=line-too-long
482
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
483
+ response = wrapped(*args, **kwargs)
484
+
485
+ try:
486
+ # Find base model from response
487
+ model = "azure_" + response.model
488
+
489
+ # Set base span attribues
490
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
491
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
492
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
493
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
494
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
495
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
496
+ gen_ai_endpoint)
497
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
498
+ response.id)
499
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
500
+ environment)
501
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
502
+ application_name)
503
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
504
+ model)
505
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
506
+ kwargs.get("user", ""))
507
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOOL_CHOICE,
508
+ kwargs.get("tool_choice", ""))
509
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
510
+ kwargs.get("temperature", 1))
511
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
512
+ kwargs.get("presence_penalty", 0))
513
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
514
+ kwargs.get("frequency_penalty", 0))
515
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
516
+ kwargs.get("seed", ""))
517
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
518
+ False)
519
+ if trace_content:
520
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
521
+ kwargs.get("prompt", ""))
522
+
523
+ # Set span attributes when tools is not passed to the function call
524
+ if "tools" not in kwargs:
525
+ # Calculate cost of the operation
526
+ cost = get_chat_model_cost(model, pricing_info,
527
+ response.usage.prompt_tokens,
528
+ response.usage.completion_tokens)
529
+
530
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
531
+ response.usage.prompt_tokens)
532
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
533
+ response.usage.completion_tokens)
534
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
535
+ response.usage.total_tokens)
536
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
537
+ response.choices[0].finish_reason)
538
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
539
+ cost)
540
+
541
+ # Set span attributes for when n = 1 (default)
542
+ if "n" not in kwargs or kwargs["n"] == 1:
543
+ if trace_content:
544
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
545
+ response.choices[0].text)
546
+
547
+ # Set span attributes for when n > 0
548
+ else:
549
+ i = 0
550
+ while i < kwargs["n"] and trace_content is True:
551
+ attribute_name = f"gen_ai.content.completion.{i}"
552
+ span.set_attribute(attribute_name,
553
+ response.choices[i].text)
554
+ i += 1
555
+ return response
556
+
557
+ # Set span attributes when tools is passed to the function call
558
+ elif "tools" in kwargs:
559
+ # Calculate cost of the operation
560
+ cost = get_chat_model_cost(model, pricing_info,
561
+ response.usage.prompt_tokens,
562
+ response.usage.completion_tokens)
563
+
564
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
565
+ "Function called with tools")
566
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
567
+ response.usage.prompt_tokens)
568
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
569
+ response.usage.completion_tokens)
570
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
571
+ response.usage.total_tokens)
572
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
573
+ cost)
574
+
575
+ span.set_status(Status(StatusCode.OK))
576
+
577
+ if disable_metrics is False:
578
+ attributes = {
579
+ TELEMETRY_SDK_NAME:
580
+ "openlit",
581
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
582
+ application_name,
583
+ SemanticConvetion.GEN_AI_SYSTEM:
584
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
585
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
586
+ environment,
587
+ SemanticConvetion.GEN_AI_TYPE:
588
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
589
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
590
+ model
591
+ }
592
+
593
+ metrics["genai_requests"].add(1, attributes)
594
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
595
+ metrics["genai_completion_tokens"].add(response.usage.completion_tokens, attributes)
596
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
597
+ metrics["genai_cost"].record(cost, attributes)
598
+
599
+ # Return original response
600
+ return response
601
+
602
+ except Exception as e:
603
+ handle_exception(span, e)
604
+ logger.error("Error in trace creation: %s", e)
605
+
606
+ # Return original response
607
+ return response
608
+
609
+ return wrapper
610
+
611
+ def azure_embedding(gen_ai_endpoint, version, environment, application_name,
612
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
613
+ """
614
+ Generates a telemetry wrapper for embeddings to collect metrics.
615
+
616
+ Args:
617
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
618
+ version: Version of the monitoring package.
619
+ environment: Deployment environment (e.g., production, staging).
620
+ application_name: Name of the application using the OpenAI API.
621
+ tracer: OpenTelemetry tracer for creating spans.
622
+ pricing_info: Information used for calculating the cost of OpenAI usage.
623
+ trace_content: Flag indicating whether to trace the actual content.
624
+
625
+ Returns:
626
+ A function that wraps the embeddings method to add telemetry.
627
+ """
628
+
629
+ def wrapper(wrapped, instance, args, kwargs):
630
+ """
631
+ Wraps the 'embeddings' API call to add telemetry.
632
+
633
+ This collects metrics such as execution time, cost, and token usage, and handles errors
634
+ gracefully, adding details to the trace for observability.
635
+
636
+ Args:
637
+ wrapped: The original 'embeddings' method to be wrapped.
638
+ instance: The instance of the class where the original method is defined.
639
+ args: Positional arguments for the 'embeddings' method.
640
+ kwargs: Keyword arguments for the 'embeddings' method.
641
+
642
+ Returns:
643
+ The response from the original 'embeddings' method.
644
+ """
645
+
646
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
647
+ response = wrapped(*args, **kwargs)
648
+
649
+ try:
650
+ # Calculate cost of the operation
651
+ cost = get_embed_model_cost("azure_" + response.model,
652
+ pricing_info, response.usage.prompt_tokens)
653
+
654
+ # Set Span attributes
655
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
656
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
657
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
658
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
659
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
660
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
661
+ gen_ai_endpoint)
662
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
663
+ environment)
664
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
665
+ application_name)
666
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
667
+ "azure_" + response.model)
668
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
669
+ kwargs.get("encoding_format", "float"))
670
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
671
+ kwargs.get("dimensions", ""))
672
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
673
+ kwargs.get("user", ""))
674
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
675
+ response.usage.prompt_tokens)
676
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
677
+ response.usage.total_tokens)
678
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
679
+ cost)
680
+ if trace_content:
681
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
682
+ kwargs.get("input", ""))
683
+
684
+ span.set_status(Status(StatusCode.OK))
685
+
686
+ if disable_metrics is False:
687
+ attributes = {
688
+ TELEMETRY_SDK_NAME:
689
+ "openlit",
690
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
691
+ application_name,
692
+ SemanticConvetion.GEN_AI_SYSTEM:
693
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
694
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
695
+ environment,
696
+ SemanticConvetion.GEN_AI_TYPE:
697
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
698
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
699
+ "azure_" + response.model
700
+ }
701
+
702
+ metrics["genai_requests"].add(1, attributes)
703
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
704
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
705
+ metrics["genai_cost"].record(cost, attributes)
706
+
707
+ # Return original response
708
+ return response
709
+
710
+ except Exception as e:
711
+ handle_exception(span, e)
712
+ logger.error("Error in trace creation: %s", e)
713
+
714
+ # Return original response
715
+ return response
716
+
717
+ return wrapper
718
+
719
+ def azure_image_generate(gen_ai_endpoint, version, environment, application_name,
720
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
721
+ """
722
+ Generates a telemetry wrapper for image generation to collect metrics.
723
+
724
+ Args:
725
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
726
+ version: Version of the monitoring package.
727
+ environment: Deployment environment (e.g., production, staging).
728
+ application_name: Name of the application using the OpenAI API.
729
+ tracer: OpenTelemetry tracer for creating spans.
730
+ pricing_info: Information used for calculating the cost of OpenAI image generation.
731
+ trace_content: Flag indicating whether to trace the input prompt and generated images.
732
+
733
+ Returns:
734
+ A function that wraps the image generation method to add telemetry.
735
+ """
736
+
737
+ def wrapper(wrapped, instance, args, kwargs):
738
+ """
739
+ Wraps the 'images.generate' API call to add telemetry.
740
+
741
+ This collects metrics such as execution time, cost, and handles errors
742
+ gracefully, adding details to the trace for observability.
743
+
744
+ Args:
745
+ wrapped: The original 'images.generate' method to be wrapped.
746
+ instance: The instance of the class where the original method is defined.
747
+ args: Positional arguments for the 'images.generate' method.
748
+ kwargs: Keyword arguments for the 'images.generate' method.
749
+
750
+ Returns:
751
+ The response from the original 'images.generate' method.
752
+ """
753
+
754
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
755
+ response = wrapped(*args, **kwargs)
756
+ images_count = 0
757
+
758
+ try:
759
+ # Find Image format
760
+ if "response_format" in kwargs and kwargs["response_format"] == "b64_json":
761
+ image = "b64_json"
762
+ else:
763
+ image = "url"
764
+
765
+ # Calculate cost of the operation
766
+ cost = get_image_model_cost("azure_" + kwargs.get("model", "dall-e-3"),
767
+ pricing_info, kwargs.get("size", "1024x1024"),
768
+ kwargs.get("quality", "standard"))
769
+
770
+ for items in response.data:
771
+ # Set Span attributes
772
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
773
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
774
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI)
775
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
776
+ SemanticConvetion.GEN_AI_TYPE_IMAGE)
777
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
778
+ gen_ai_endpoint)
779
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
780
+ response.created)
781
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
782
+ environment)
783
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
784
+ application_name)
785
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
786
+ "azure_" + kwargs.get("model", "dall-e-3"))
787
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_IMAGE_SIZE,
788
+ kwargs.get("size", "1024x1024"))
789
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_IMAGE_QUALITY,
790
+ kwargs.get("quality", "standard"))
791
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_IMAGE_STYLE,
792
+ kwargs.get("style", "vivid"))
793
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_REVISED_PROMPT,
794
+ items.revised_prompt if response.revised_prompt else "")
795
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
796
+ kwargs.get("user", ""))
797
+ if trace_content:
798
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
799
+ kwargs.get("prompt", ""))
800
+
801
+ attribute_name = f"gen_ai.response.image.{images_count}"
802
+ span.set_attribute(attribute_name,
803
+ getattr(items, image))
804
+
805
+ images_count+=1
806
+
807
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
808
+ len(response.data) * cost)
809
+ span.set_status(Status(StatusCode.OK))
810
+
811
+ if disable_metrics is False:
812
+ attributes = {
813
+ TELEMETRY_SDK_NAME:
814
+ "openlit",
815
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
816
+ application_name,
817
+ SemanticConvetion.GEN_AI_SYSTEM:
818
+ SemanticConvetion.GEN_AI_SYSTEM_AZURE_OPENAI,
819
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
820
+ environment,
821
+ SemanticConvetion.GEN_AI_TYPE:
822
+ SemanticConvetion.GEN_AI_TYPE_IMAGE,
823
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
824
+ "azure_" + kwargs.get("model", "dall-e-3")
825
+ }
826
+
827
+ metrics["genai_requests"].add(1, attributes)
828
+ metrics["genai_cost"].record(cost, attributes)
829
+
830
+ # Return original response
831
+ return response
832
+
833
+ except Exception as e:
834
+ handle_exception(span, e)
835
+ logger.error("Error in trace creation: %s", e)
836
+
837
+ # Return original response
838
+ return response
839
+
840
+ return wrapper