openlit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,891 @@
1
+ # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, too-many-branches
2
+ """
3
+ Module for monitoring OpenAI API calls.
4
+ """
5
+
6
+ import logging
7
+ from opentelemetry.trace import SpanKind, Status, StatusCode
8
+ from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
+ from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, get_audio_model_cost
10
+ from openlit.__helpers import get_image_model_cost, openai_tokens, handle_exception
11
+ from openlit.semcov import SemanticConvetion
12
+
13
+ # Initialize logger for logging potential issues and operations
14
+ logger = logging.getLogger(__name__)
15
+
16
+ def chat_completions(gen_ai_endpoint, version, environment, application_name,
17
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
18
+ """
19
+ Generates a telemetry wrapper for chat completions to collect metrics.
20
+
21
+ Args:
22
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
23
+ version: Version of the monitoring package.
24
+ environment: Deployment environment (e.g., production, staging).
25
+ application_name: Name of the application using the OpenAI API.
26
+ tracer: OpenTelemetry tracer for creating spans.
27
+ pricing_info: Information used for calculating the cost of OpenAI usage.
28
+ trace_content: Flag indicating whether to trace the actual content.
29
+
30
+ Returns:
31
+ A function that wraps the chat completions method to add telemetry.
32
+ """
33
+
34
+ def wrapper(wrapped, instance, args, kwargs):
35
+ """
36
+ Wraps the 'chat.completions' API call to add telemetry.
37
+
38
+ This collects metrics such as execution time, cost, and token usage, and handles errors
39
+ gracefully, adding details to the trace for observability.
40
+
41
+ Args:
42
+ wrapped: The original 'chat.completions' method to be wrapped.
43
+ instance: The instance of the class where the original method is defined.
44
+ args: Positional arguments for the 'chat.completions' method.
45
+ kwargs: Keyword arguments for the 'chat.completions' method.
46
+
47
+ Returns:
48
+ The response from the original 'chat.completions' method.
49
+ """
50
+
51
+ # Check if streaming is enabled for the API call
52
+ streaming = kwargs.get("stream", False)
53
+
54
+ # pylint: disable=no-else-return
55
+ if streaming:
56
+ # Special handling for streaming response to accommodate the nature of data flow
57
+ def stream_generator():
58
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
59
+ # Placeholder for aggregating streaming response
60
+ llmresponse = ""
61
+
62
+
63
+ # Loop through streaming events capturing relevant details
64
+ for chunk in wrapped(*args, **kwargs):
65
+ # Collect message IDs and aggregated response from events
66
+ if len(chunk.choices) > 0:
67
+ # pylint: disable=line-too-long
68
+ if hasattr(chunk.choices[0], "delta") and hasattr(chunk.choices[0].delta, "content"):
69
+ content = chunk.choices[0].delta.content
70
+ if content:
71
+ llmresponse += content
72
+ yield chunk
73
+ response_id = chunk.id
74
+
75
+ # Handling exception ensure observability without disrupting operation
76
+ try:
77
+ # Format 'messages' into a single string
78
+ message_prompt = kwargs.get("messages", "")
79
+ formatted_messages = []
80
+ for message in message_prompt:
81
+ role = message["role"]
82
+ content = message["content"]
83
+
84
+ if isinstance(content, list):
85
+ content_str = ", ".join(
86
+ # pylint: disable=line-too-long
87
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
88
+ if "type" in item else f'text: {item["text"]}'
89
+ for item in content
90
+ )
91
+ formatted_messages.append(f"{role}: {content_str}")
92
+ else:
93
+ formatted_messages.append(f"{role}: {content}")
94
+ prompt = "\n".join(formatted_messages)
95
+
96
+ # Calculate tokens using input prompt and aggregated response
97
+ prompt_tokens = openai_tokens(prompt,
98
+ kwargs.get("model", "gpt-3.5-turbo"))
99
+ completion_tokens = openai_tokens(llmresponse,
100
+ kwargs.get("model", "gpt-3.5-turbo"))
101
+
102
+ # Calculate cost of the operation
103
+ cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
104
+ pricing_info, prompt_tokens,
105
+ completion_tokens)
106
+
107
+ # Set Span attributes
108
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
109
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
110
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
111
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
112
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
113
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
114
+ gen_ai_endpoint)
115
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
116
+ response_id)
117
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
118
+ environment)
119
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
120
+ application_name)
121
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
122
+ kwargs.get("model", "gpt-3.5-turbo"))
123
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
124
+ kwargs.get("user", ""))
125
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
126
+ kwargs.get("top_p", 1))
127
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
128
+ kwargs.get("max_tokens", ""))
129
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
130
+ kwargs.get("temperature", 1))
131
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
132
+ kwargs.get("presence_penalty", 0))
133
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
134
+ kwargs.get("frequency_penalty", 0))
135
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
136
+ kwargs.get("seed", ""))
137
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
138
+ True)
139
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
140
+ prompt_tokens)
141
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
142
+ completion_tokens)
143
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
144
+ prompt_tokens + completion_tokens)
145
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
146
+ cost)
147
+ if trace_content:
148
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
149
+ prompt)
150
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
151
+ llmresponse)
152
+
153
+ span.set_status(Status(StatusCode.OK))
154
+
155
+ if disable_metrics is False:
156
+ attributes = {
157
+ TELEMETRY_SDK_NAME:
158
+ "openlit",
159
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
160
+ application_name,
161
+ SemanticConvetion.GEN_AI_SYSTEM:
162
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
163
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
164
+ environment,
165
+ SemanticConvetion.GEN_AI_TYPE:
166
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
167
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
168
+ kwargs.get("model", "gpt-3.5-turbo")
169
+ }
170
+
171
+ metrics["genai_requests"].add(1, attributes)
172
+ metrics["genai_total_tokens"].add(
173
+ prompt_tokens + completion_tokens, attributes
174
+ )
175
+ metrics["genai_completion_tokens"].add(completion_tokens, attributes)
176
+ metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
177
+ metrics["genai_cost"].record(cost, attributes)
178
+
179
+ except Exception as e:
180
+ handle_exception(span, e)
181
+ logger.error("Error in trace creation: %s", e)
182
+
183
+ return stream_generator()
184
+
185
+ # Handling for non-streaming responses
186
+ else:
187
+ # pylint: disable=line-too-long
188
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
189
+ response = wrapped(*args, **kwargs)
190
+
191
+ try:
192
+ # Format 'messages' into a single string
193
+ message_prompt = kwargs.get("messages", "")
194
+ formatted_messages = []
195
+ for message in message_prompt:
196
+ role = message["role"]
197
+ content = message["content"]
198
+
199
+ if isinstance(content, list):
200
+ content_str = ", ".join(
201
+ # pylint: disable=line-too-long
202
+ f'{item["type"]}: {item["text"] if "text" in item else item["image_url"]}'
203
+ if "type" in item else f'text: {item["text"]}'
204
+ for item in content
205
+ )
206
+ formatted_messages.append(f"{role}: {content_str}")
207
+ else:
208
+ formatted_messages.append(f"{role}: {content}")
209
+ prompt = "\n".join(formatted_messages)
210
+
211
+ # Set base span attribues
212
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
213
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
214
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
215
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
216
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
217
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
218
+ gen_ai_endpoint)
219
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
220
+ response.id)
221
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
222
+ environment)
223
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
224
+ application_name)
225
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
226
+ kwargs.get("model", "gpt-3.5-turbo"))
227
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TOP_P,
228
+ kwargs.get("top_p", 1))
229
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MAX_TOKENS,
230
+ kwargs.get("max_tokens", ""))
231
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
232
+ kwargs.get("user", ""))
233
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TEMPERATURE,
234
+ kwargs.get("temperature", 1))
235
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_PRESENCE_PENALTY,
236
+ kwargs.get("presence_penalty", 0))
237
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FREQUENCY_PENALTY,
238
+ kwargs.get("frequency_penalty", 0))
239
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_SEED,
240
+ kwargs.get("seed", ""))
241
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
242
+ False)
243
+ if trace_content:
244
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
245
+ prompt)
246
+
247
+ # Set span attributes when tools is not passed to the function call
248
+ if "tools" not in kwargs:
249
+ # Calculate cost of the operation
250
+ cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
251
+ pricing_info, response.usage.prompt_tokens,
252
+ response.usage.completion_tokens)
253
+
254
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
255
+ response.usage.prompt_tokens)
256
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
257
+ response.usage.completion_tokens)
258
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
259
+ response.usage.total_tokens)
260
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_FINISH_REASON,
261
+ response.choices[0].finish_reason)
262
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
263
+ cost)
264
+
265
+ # Set span attributes for when n = 1 (default)
266
+ if "n" not in kwargs or kwargs["n"] == 1:
267
+ if trace_content:
268
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
269
+ response.choices[0].message.content)
270
+
271
+ # Set span attributes for when n > 0
272
+ else:
273
+ i = 0
274
+ while i < kwargs["n"] and trace_content is True:
275
+ attribute_name = f"gen_ai.content.completion.{i}"
276
+ span.set_attribute(attribute_name,
277
+ response.choices[i].message.content)
278
+ i += 1
279
+
280
+ # Return original response
281
+ return response
282
+
283
+ # Set span attributes when tools is passed to the function call
284
+ elif "tools" in kwargs:
285
+ # Calculate cost of the operation
286
+ cost = get_chat_model_cost(kwargs.get("model", "gpt-3.5-turbo"),
287
+ pricing_info, response.usage.prompt_tokens,
288
+ response.usage.completion_tokens)
289
+
290
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
291
+ "Function called with tools")
292
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
293
+ response.usage.prompt_tokens)
294
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
295
+ response.usage.completion_tokens)
296
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
297
+ response.usage.total_tokens)
298
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
299
+ cost)
300
+
301
+ span.set_status(Status(StatusCode.OK))
302
+
303
+ if disable_metrics is False:
304
+ attributes = {
305
+ TELEMETRY_SDK_NAME:
306
+ "openlit",
307
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
308
+ application_name,
309
+ SemanticConvetion.GEN_AI_SYSTEM:
310
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
311
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
312
+ environment,
313
+ SemanticConvetion.GEN_AI_TYPE:
314
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
315
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
316
+ kwargs.get("model", "gpt-3.5-turbo")
317
+ }
318
+
319
+ metrics["genai_requests"].add(1, attributes)
320
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
321
+ metrics["genai_completion_tokens"].add(response.usage.completion_tokens, attributes)
322
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
323
+ metrics["genai_cost"].record(cost, attributes)
324
+
325
+ # Return original response
326
+ return response
327
+
328
+ except Exception as e:
329
+ handle_exception(span, e)
330
+ logger.error("Error in trace creation: %s", e)
331
+
332
+ # Return original response
333
+ return response
334
+
335
+ return wrapper
336
+
337
+ def embedding(gen_ai_endpoint, version, environment, application_name,
338
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
339
+ """
340
+ Generates a telemetry wrapper for embeddings to collect metrics.
341
+
342
+ Args:
343
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
344
+ version: Version of the monitoring package.
345
+ environment: Deployment environment (e.g., production, staging).
346
+ application_name: Name of the application using the OpenAI API.
347
+ tracer: OpenTelemetry tracer for creating spans.
348
+ pricing_info: Information used for calculating the cost of OpenAI usage.
349
+ trace_content: Flag indicating whether to trace the actual content.
350
+
351
+ Returns:
352
+ A function that wraps the embeddings method to add telemetry.
353
+ """
354
+
355
+ def wrapper(wrapped, instance, args, kwargs):
356
+ """
357
+ Wraps the 'embeddings' API call to add telemetry.
358
+
359
+ This collects metrics such as execution time, cost, and token usage, and handles errors
360
+ gracefully, adding details to the trace for observability.
361
+
362
+ Args:
363
+ wrapped: The original 'embeddings' method to be wrapped.
364
+ instance: The instance of the class where the original method is defined.
365
+ args: Positional arguments for the 'embeddings' method.
366
+ kwargs: Keyword arguments for the 'embeddings' method.
367
+
368
+ Returns:
369
+ The response from the original 'embeddings' method.
370
+ """
371
+
372
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
373
+ response = wrapped(*args, **kwargs)
374
+
375
+ try:
376
+ # Calculate cost of the operation
377
+ cost = get_embed_model_cost(kwargs.get("model", "text-embedding-ada-002"),
378
+ pricing_info, response.usage.prompt_tokens)
379
+
380
+ # Set Span attributes
381
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
382
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
383
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
384
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
385
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
386
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
387
+ gen_ai_endpoint)
388
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
389
+ environment)
390
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
391
+ application_name)
392
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
393
+ kwargs.get("model", "text-embedding-ada-002"))
394
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_FORMAT,
395
+ kwargs.get("encoding_format", "float"))
396
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_EMBEDDING_DIMENSION,
397
+ kwargs.get("dimensions", ""))
398
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
399
+ kwargs.get("user", ""))
400
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
401
+ response.usage.prompt_tokens)
402
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
403
+ response.usage.total_tokens)
404
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
405
+ cost)
406
+ if trace_content:
407
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
408
+ kwargs.get("input", ""))
409
+
410
+ span.set_status(Status(StatusCode.OK))
411
+
412
+ if disable_metrics is False:
413
+ attributes = {
414
+ TELEMETRY_SDK_NAME:
415
+ "openlit",
416
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
417
+ application_name,
418
+ SemanticConvetion.GEN_AI_SYSTEM:
419
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
420
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
421
+ environment,
422
+ SemanticConvetion.GEN_AI_TYPE:
423
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
424
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
425
+ kwargs.get("model", "text-embedding-ada-002")
426
+ }
427
+
428
+ metrics["genai_requests"].add(1, attributes)
429
+ metrics["genai_total_tokens"].add(response.usage.total_tokens, attributes)
430
+ metrics["genai_prompt_tokens"].add(response.usage.prompt_tokens, attributes)
431
+ metrics["genai_cost"].record(cost, attributes)
432
+
433
+ # Return original response
434
+ return response
435
+
436
+ except Exception as e:
437
+ handle_exception(span, e)
438
+ logger.error("Error in trace creation: %s", e)
439
+
440
+ # Return original response
441
+ return response
442
+
443
+ return wrapper
444
+
445
+ def finetune(gen_ai_endpoint, version, environment, application_name,
446
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
447
+ """
448
+ Generates a telemetry wrapper for fine-tuning jobs to collect metrics.
449
+
450
+ Args:
451
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
452
+ version: Version of the monitoring package.
453
+ environment: Deployment environment (e.g., production, staging).
454
+ application_name: Name of the application using the OpenAI API.
455
+ tracer: OpenTelemetry tracer for creating spans.
456
+ pricing_info: Information used for calculating the cost of OpenAI usage.
457
+ trace_content: Flag indicating whether to trace the actual content.
458
+
459
+ Returns:
460
+ A function that wraps the fine tuning creation method to add telemetry.
461
+ """
462
+
463
+ def wrapper(wrapped, instance, args, kwargs):
464
+ """
465
+ Wraps the 'fine_tuning.jobs.create' API call to add telemetry.
466
+
467
+ This collects metrics such as execution time, usage stats, and handles errors
468
+ gracefully, adding details to the trace for observability.
469
+
470
+ Args:
471
+ wrapped: The original 'fine_tuning.jobs.create' method to be wrapped.
472
+ instance: The instance of the class where the original method is defined.
473
+ args: Positional arguments for the method.
474
+ kwargs: Keyword arguments for the method.
475
+
476
+ Returns:
477
+ The response from the original 'fine_tuning.jobs.create' method.
478
+ """
479
+
480
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
481
+ response = wrapped(*args, **kwargs)
482
+
483
+ # Handling exception ensure observability without disrupting operation
484
+ try:
485
+ # Set Span attributes
486
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
487
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
488
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
489
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
490
+ SemanticConvetion.GEN_AI_TYPE_FINETUNING)
491
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
492
+ gen_ai_endpoint)
493
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
494
+ environment)
495
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
496
+ application_name)
497
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
498
+ kwargs.get("model", "gpt-3.5-turbo"))
499
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_TRAINING_FILE,
500
+ kwargs.get("training_file", ""))
501
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_VALIDATION_FILE,
502
+ kwargs.get("validation_file", ""))
503
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_BATCH_SIZE,
504
+ kwargs.get("hyperparameters.batch_size", "auto"))
505
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_LRM,
506
+ kwargs.get("hyperparameters.learning_rate_multiplier",
507
+ "auto"))
508
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_EPOCHS,
509
+ kwargs.get("hyperparameters.n_epochs", "auto"))
510
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_MODEL_SUFFIX,
511
+ kwargs.get("suffix", ""))
512
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
513
+ response.id)
514
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
515
+ response.usage.prompt_tokens)
516
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_FINETUNE_STATUS,
517
+ response.status)
518
+ span.set_status(Status(StatusCode.OK))
519
+
520
+ if disable_metrics is False:
521
+ attributes = {
522
+ TELEMETRY_SDK_NAME:
523
+ "openlit",
524
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
525
+ application_name,
526
+ SemanticConvetion.GEN_AI_SYSTEM:
527
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
528
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
529
+ environment,
530
+ SemanticConvetion.GEN_AI_TYPE:
531
+ SemanticConvetion.GEN_AI_TYPE_FINETUNING,
532
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
533
+ kwargs.get("model", "gpt-3.5-turbo")
534
+ }
535
+
536
+ metrics["genai_requests"].add(1, attributes)
537
+
538
+ # Return original response
539
+ return response
540
+
541
+ except Exception as e:
542
+ handle_exception(span, e)
543
+ logger.error("Error in trace creation: %s", e)
544
+
545
+ # Return original response
546
+ return response
547
+
548
+ return wrapper
549
+
550
+ def image_generate(gen_ai_endpoint, version, environment, application_name,
551
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
552
+ """
553
+ Generates a telemetry wrapper for image generation to collect metrics.
554
+
555
+ Args:
556
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
557
+ version: Version of the monitoring package.
558
+ environment: Deployment environment (e.g., production, staging).
559
+ application_name: Name of the application using the OpenAI API.
560
+ tracer: OpenTelemetry tracer for creating spans.
561
+ pricing_info: Information used for calculating the cost of OpenAI image generation.
562
+ trace_content: Flag indicating whether to trace the input prompt and generated images.
563
+
564
+ Returns:
565
+ A function that wraps the image generation method to add telemetry.
566
+ """
567
+
568
+ def wrapper(wrapped, instance, args, kwargs):
569
+ """
570
+ Wraps the 'images.generate' API call to add telemetry.
571
+
572
+ This collects metrics such as execution time, cost, and handles errors
573
+ gracefully, adding details to the trace for observability.
574
+
575
+ Args:
576
+ wrapped: The original 'images.generate' method to be wrapped.
577
+ instance: The instance of the class where the original method is defined.
578
+ args: Positional arguments for the 'images.generate' method.
579
+ kwargs: Keyword arguments for the 'images.generate' method.
580
+
581
+ Returns:
582
+ The response from the original 'images.generate' method.
583
+ """
584
+
585
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
586
+ response = wrapped(*args, **kwargs)
587
+ images_count = 0
588
+
589
+ try:
590
+ # Find Image format
591
+ if "response_format" in kwargs and kwargs["response_format"] == "b64_json":
592
+ image = "b64_json"
593
+ else:
594
+ image = "url"
595
+
596
+ # Calculate cost of the operation
597
+ cost = get_image_model_cost(kwargs.get("model", "dall-e-2"),
598
+ pricing_info, kwargs.get("size", "1024x1024"),
599
+ kwargs.get("quality", "standard"))
600
+
601
+ for items in response.data:
602
+ # Set Span attributes
603
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
604
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
605
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
606
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
607
+ SemanticConvetion.GEN_AI_TYPE_IMAGE)
608
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
609
+ gen_ai_endpoint)
610
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
611
+ response.created)
612
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
613
+ environment)
614
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
615
+ application_name)
616
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
617
+ kwargs.get("model", "dall-e-2"))
618
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_IMAGE_SIZE,
619
+ kwargs.get("size", "1024x1024"))
620
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_IMAGE_QUALITY,
621
+ kwargs.get("quality", "standard"))
622
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_IMAGE_STYLE,
623
+ kwargs.get("style", "vivid"))
624
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_REVISED_PROMPT,
625
+ items.revised_prompt if items.revised_prompt else "")
626
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
627
+ kwargs.get("user", ""))
628
+ if trace_content:
629
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
630
+ kwargs.get("prompt", ""))
631
+
632
+ attribute_name = f"gen_ai.response.image.{images_count}"
633
+ span.set_attribute(attribute_name,
634
+ getattr(items, image))
635
+
636
+ images_count+=1
637
+
638
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
639
+ len(response.data) * cost)
640
+ span.set_status(Status(StatusCode.OK))
641
+
642
+ if disable_metrics is False:
643
+ attributes = {
644
+ TELEMETRY_SDK_NAME:
645
+ "openlit",
646
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
647
+ application_name,
648
+ SemanticConvetion.GEN_AI_SYSTEM:
649
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
650
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
651
+ environment,
652
+ SemanticConvetion.GEN_AI_TYPE:
653
+ SemanticConvetion.GEN_AI_TYPE_IMAGE,
654
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
655
+ kwargs.get("model", "dall-e-2")
656
+ }
657
+
658
+ metrics["genai_requests"].add(1, attributes)
659
+ metrics["genai_cost"].record(cost, attributes)
660
+
661
+ # Return original response
662
+ return response
663
+
664
+ except Exception as e:
665
+ handle_exception(span, e)
666
+ logger.error("Error in trace creation: %s", e)
667
+
668
+ # Return original response
669
+ return response
670
+
671
+ return wrapper
672
+
673
+ def image_variatons(gen_ai_endpoint, version, environment, application_name,
674
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
675
+ """
676
+ Generates a telemetry wrapper for creating image variations to collect metrics.
677
+
678
+ Args:
679
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
680
+ version: Version of the monitoring package.
681
+ environment: Deployment environment (e.g., production, staging).
682
+ application_name: Name of the application using the OpenAI API.
683
+ tracer: OpenTelemetry tracer for creating spans.
684
+ pricing_info: Information used for calculating the cost of generating image variations.
685
+ trace_content: Flag indicating whether to trace the input image and generated variations.
686
+
687
+ Returns:
688
+ A function that wraps the image variations creation method to add telemetry.
689
+ """
690
+
691
+ def wrapper(wrapped, instance, args, kwargs):
692
+ """
693
+ Wraps the 'images.create.variations' API call to add telemetry.
694
+
695
+ This collects metrics such as execution time, cost, and handles errors
696
+ gracefully, adding details to the trace for observability.
697
+
698
+ Args:
699
+ wrapped: The original 'images.create.variations' method to be wrapped.
700
+ instance: The instance of the class where the original method is defined.
701
+ args: Positional arguments for the method.
702
+ kwargs: Keyword arguments for the method.
703
+
704
+ Returns:
705
+ The response from the original 'images.create.variations' method.
706
+ """
707
+
708
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
709
+ response = wrapped(*args, **kwargs)
710
+ images_count = 0
711
+
712
+ try:
713
+ # Find Image format
714
+ if "response_format" in kwargs and kwargs["response_format"] == "b64_json":
715
+ image = "b64_json"
716
+ else:
717
+ image = "url"
718
+
719
+ # Calculate cost of the operation
720
+ cost = get_image_model_cost(kwargs.get("model", "dall-e-2"), pricing_info,
721
+ kwargs.get("size", "1024x1024"), "standard")
722
+
723
+ for items in response.data:
724
+ # Set Span attributes
725
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
726
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
727
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
728
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
729
+ SemanticConvetion.GEN_AI_TYPE_IMAGE)
730
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
731
+ gen_ai_endpoint)
732
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_ID,
733
+ response.created)
734
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
735
+ environment)
736
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
737
+ application_name)
738
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
739
+ kwargs.get("model", "dall-e-2"))
740
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_USER,
741
+ kwargs.get("user", ""))
742
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_IMAGE_SIZE,
743
+ kwargs.get("size", "1024x1024"))
744
+ span.set_attribute(SemanticConvetion.GEN_AI_RESPONSE_IMAGE_QUALITY,
745
+ "standard")
746
+ if trace_content:
747
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
748
+ kwargs.get(SemanticConvetion.GEN_AI_TYPE_IMAGE, ""))
749
+
750
+ attribute_name = f"gen_ai.response.image.{images_count}"
751
+ span.set_attribute(attribute_name,
752
+ getattr(items, image))
753
+
754
+ images_count+=1
755
+
756
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
757
+ len(response.data) * cost)
758
+ span.set_status(Status(StatusCode.OK))
759
+
760
+ if disable_metrics is False:
761
+ attributes = {
762
+ TELEMETRY_SDK_NAME:
763
+ "openlit",
764
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
765
+ application_name,
766
+ SemanticConvetion.GEN_AI_SYSTEM:
767
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
768
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
769
+ environment,
770
+ SemanticConvetion.GEN_AI_TYPE:
771
+ SemanticConvetion.GEN_AI_TYPE_IMAGE,
772
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
773
+ kwargs.get("model", "dall-e-2")
774
+ }
775
+
776
+ metrics["genai_requests"].add(1, attributes)
777
+ metrics["genai_cost"].record(cost, attributes)
778
+
779
+ # Return original response
780
+ return response
781
+
782
+ except Exception as e:
783
+ handle_exception(span, e)
784
+ logger.error("Error in trace creation: %s", e)
785
+
786
+ # Return original response
787
+ return response
788
+
789
+ return wrapper
790
+
791
+ def audio_create(gen_ai_endpoint, version, environment, application_name,
792
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
793
+ """
794
+ Generates a telemetry wrapper for creating speech audio to collect metrics.
795
+
796
+ Args:
797
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
798
+ version: Version of the monitoring package.
799
+ environment: Deployment environment (e.g., production, staging).
800
+ application_name: Name of the application using the OpenAI API.
801
+ tracer: OpenTelemetry tracer for creating spans.
802
+ pricing_info: Information used for calculating the cost of generating speech audio.
803
+ trace_content: Flag indicating whether to trace the input text and generated audio.
804
+
805
+ Returns:
806
+ A function that wraps the speech audio creation method to add telemetry.
807
+ """
808
+
809
+ def wrapper(wrapped, instance, args, kwargs):
810
+ """
811
+ Wraps the 'audio.speech.create' API call to add telemetry.
812
+
813
+ This collects metrics such as execution time, cost, and handles errors
814
+ gracefully, adding details to the trace for observability.
815
+
816
+ Args:
817
+ wrapped: The original 'audio.speech.create' method to be wrapped.
818
+ instance: The instance of the class where the original method is defined.
819
+ args: Positional arguments for the 'audio.speech.create' method.
820
+ kwargs: Keyword arguments for the 'audio.speech.create' method.
821
+
822
+ Returns:
823
+ The response from the original 'audio.speech.create' method.
824
+ """
825
+
826
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
827
+ response = wrapped(*args, **kwargs)
828
+
829
+ try:
830
+ # Calculate cost of the operation
831
+ cost = get_audio_model_cost(kwargs.get("model", "tts-1"),
832
+ pricing_info, kwargs.get("input", ""))
833
+
834
+ # Set Span attributes
835
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
836
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
837
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI)
838
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
839
+ SemanticConvetion.GEN_AI_TYPE_AUDIO)
840
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
841
+ gen_ai_endpoint)
842
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
843
+ environment)
844
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
845
+ application_name)
846
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
847
+ kwargs.get("model", "tts-1"))
848
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_VOICE,
849
+ kwargs.get("voice", "alloy"))
850
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_RESPONSE_FORMAT,
851
+ kwargs.get("response_format", "mp3"))
852
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_AUDIO_SPEED,
853
+ kwargs.get("speed", 1))
854
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
855
+ cost)
856
+ if trace_content:
857
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
858
+ kwargs.get("input", ""))
859
+
860
+ span.set_status(Status(StatusCode.OK))
861
+
862
+ if disable_metrics is False:
863
+ attributes = {
864
+ TELEMETRY_SDK_NAME:
865
+ "openlit",
866
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
867
+ application_name,
868
+ SemanticConvetion.GEN_AI_SYSTEM:
869
+ SemanticConvetion.GEN_AI_SYSTEM_OPENAI,
870
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
871
+ environment,
872
+ SemanticConvetion.GEN_AI_TYPE:
873
+ SemanticConvetion.GEN_AI_TYPE_AUDIO,
874
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
875
+ kwargs.get("model", "tts-1")
876
+ }
877
+
878
+ metrics["genai_requests"].add(1, attributes)
879
+ metrics["genai_cost"].record(cost, attributes)
880
+
881
+ # Return original response
882
+ return response
883
+
884
+ except Exception as e:
885
+ handle_exception(span, e)
886
+ logger.error("Error in trace creation: %s", e)
887
+
888
+ # Return original response
889
+ return response
890
+
891
+ return wrapper