openlit 1.1.3__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1047 @@
1
+ # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, protected-access, too-many-lines
2
+ """
3
+ Module for monitoring VertexAI API calls.
4
+ """
5
+
6
+ import logging
7
+ import math
8
+ from opentelemetry.trace import SpanKind, Status, StatusCode
9
+ from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
10
+ from openlit.__helpers import get_chat_model_cost, get_embed_model_cost, handle_exception
11
+ from openlit.semcov import SemanticConvetion
12
+
13
+ # Initialize logger for logging potential issues and operations
14
+ logger = logging.getLogger(__name__)
15
+
16
+ def generate_content_async(gen_ai_endpoint, version, environment, application_name, tracer,
17
+ pricing_info, trace_content, metrics, disable_metrics):
18
+ """
19
+ Generates a telemetry wrapper for messages to collect metrics.
20
+
21
+ Args:
22
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
23
+ version: Version of the monitoring package.
24
+ environment: Deployment environment (e.g., production, staging).
25
+ application_name: Name of the application using the OpenAI API.
26
+ tracer: OpenTelemetry tracer for creating spans.
27
+ pricing_info: Information used for calculating the cost of OpenAI usage.
28
+ trace_content: Flag indicating whether to trace the actual content.
29
+
30
+ Returns:
31
+ A function that wraps the chat method to add telemetry.
32
+ """
33
+
34
+ async def wrapper(wrapped, instance, args, kwargs):
35
+ """
36
+ Wraps the 'generate_content' API call to add telemetry.
37
+
38
+ This collects metrics such as execution time, cost, and token usage, and handles errors
39
+ gracefully, adding details to the trace for observability.
40
+
41
+ Args:
42
+ wrapped: The original 'generate_content' method to be wrapped.
43
+ instance: The instance of the class where the original method is defined.
44
+ args: Positional arguments for the 'generate_content' method.
45
+ kwargs: Keyword arguments for the 'generate_content' method.
46
+
47
+ Returns:
48
+ The response from the original 'generate_content' method.
49
+ """
50
+
51
+ # Check if streaming is enabled for the API call
52
+ streaming = kwargs.get("stream", False)
53
+
54
+ # pylint: disable=no-else-return
55
+ if streaming:
56
+ # Special handling for streaming response to accommodate the nature of data flow
57
+ async def stream_generator():
58
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
59
+ # Placeholder for aggregating streaming response
60
+ llmresponse = ""
61
+
62
+ # Loop through streaming events capturing relevant details
63
+ async for event in await wrapped(*args, **kwargs):
64
+ llmresponse += str(event.text)
65
+ prompt_tokens = event.usage_metadata.prompt_token_count
66
+ completion_tokens = event.usage_metadata.candidates_token_count
67
+ total_tokens = event.usage_metadata.total_token_count
68
+ yield event
69
+
70
+ # Handling exception ensure observability without disrupting operation
71
+ try:
72
+ prompt = str(args[0][0])
73
+
74
+ model = "/".join(instance._model_name.split("/")[3:])
75
+
76
+ # Calculate cost of the operation
77
+ cost = get_chat_model_cost(model,
78
+ pricing_info, prompt_tokens,
79
+ completion_tokens)
80
+
81
+ # Set Span attributes
82
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
83
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
84
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
85
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
86
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
87
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
88
+ gen_ai_endpoint)
89
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
90
+ environment)
91
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
92
+ application_name)
93
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
94
+ model)
95
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
96
+ True)
97
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
98
+ prompt_tokens)
99
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
100
+ completion_tokens)
101
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
102
+ total_tokens)
103
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
104
+ cost)
105
+ if trace_content:
106
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
107
+ prompt)
108
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
109
+ llmresponse)
110
+
111
+ span.set_status(Status(StatusCode.OK))
112
+
113
+ if disable_metrics is False:
114
+ attributes = {
115
+ TELEMETRY_SDK_NAME:
116
+ "openlit",
117
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
118
+ application_name,
119
+ SemanticConvetion.GEN_AI_SYSTEM:
120
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
121
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
122
+ environment,
123
+ SemanticConvetion.GEN_AI_TYPE:
124
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
125
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
126
+ model
127
+ }
128
+
129
+ metrics["genai_requests"].add(1, attributes)
130
+ metrics["genai_total_tokens"].add(
131
+ total_tokens, attributes
132
+ )
133
+ metrics["genai_completion_tokens"].add(completion_tokens, attributes)
134
+ metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
135
+ metrics["genai_cost"].record(cost, attributes)
136
+
137
+ except Exception as e:
138
+ handle_exception(span, e)
139
+ logger.error("Error in trace creation: %s", e)
140
+
141
+ return stream_generator()
142
+
143
+ # Handling for non-streaming responses
144
+ else:
145
+ with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
146
+ response = await wrapped(*args, **kwargs)
147
+
148
+ try:
149
+ # Format 'messages' into a single string
150
+ prompt = str(args[0][0])
151
+
152
+ model = "/".join(instance._model_name.split("/")[3:])
153
+ # Calculate cost of the operation
154
+ cost = get_chat_model_cost(model,
155
+ pricing_info,
156
+ response.usage_metadata.prompt_token_count,
157
+ response.usage_metadata.candidates_token_count)
158
+
159
+ # Set Span attribues
160
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
161
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
162
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
163
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
164
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
165
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
166
+ gen_ai_endpoint)
167
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
168
+ environment)
169
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
170
+ application_name)
171
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
172
+ model)
173
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
174
+ False)
175
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
176
+ response.usage_metadata.prompt_token_count)
177
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
178
+ response.usage_metadata.candidates_token_count)
179
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
180
+ response.usage_metadata.total_token_count)
181
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
182
+ cost)
183
+ if trace_content:
184
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
185
+ prompt)
186
+ # pylint: disable=line-too-long
187
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.candidates[0].content.parts[0].text)
188
+
189
+ span.set_status(Status(StatusCode.OK))
190
+
191
+ if disable_metrics is False:
192
+ attributes = {
193
+ TELEMETRY_SDK_NAME:
194
+ "openlit",
195
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
196
+ application_name,
197
+ SemanticConvetion.GEN_AI_SYSTEM:
198
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
199
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
200
+ environment,
201
+ SemanticConvetion.GEN_AI_TYPE:
202
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
203
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
204
+ model
205
+ }
206
+
207
+ metrics["genai_requests"].add(1, attributes)
208
+ metrics["genai_total_tokens"].add(
209
+ response.usage_metadata.total_token_count, attributes)
210
+ metrics["genai_completion_tokens"].add(
211
+ response.usage_metadata.candidates_token_count, attributes)
212
+ metrics["genai_prompt_tokens"].add(
213
+ response.usage_metadata.prompt_token_count, attributes)
214
+ metrics["genai_cost"].record(cost, attributes)
215
+
216
+ # Return original response
217
+ return response
218
+
219
+ except Exception as e:
220
+ handle_exception(span, e)
221
+ logger.error("Error in trace creation: %s", e)
222
+
223
+ # Return original response
224
+ return response
225
+
226
+ return wrapper
227
+
228
+
229
+ def send_message_async(gen_ai_endpoint, version, environment, application_name, tracer,
230
+ pricing_info, trace_content, metrics, disable_metrics):
231
+ """
232
+ Generates a telemetry wrapper for messages to collect metrics.
233
+
234
+ Args:
235
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
236
+ version: Version of the monitoring package.
237
+ environment: Deployment environment (e.g., production, staging).
238
+ application_name: Name of the application using the OpenAI API.
239
+ tracer: OpenTelemetry tracer for creating spans.
240
+ pricing_info: Information used for calculating the cost of OpenAI usage.
241
+ trace_content: Flag indicating whether to trace the actual content.
242
+
243
+ Returns:
244
+ A function that wraps the chat method to add telemetry.
245
+ """
246
+
247
+ async def wrapper(wrapped, instance, args, kwargs):
248
+ """
249
+ Wraps the 'generate_content' API call to add telemetry.
250
+
251
+ This collects metrics such as execution time, cost, and token usage, and handles errors
252
+ gracefully, adding details to the trace for observability.
253
+
254
+ Args:
255
+ wrapped: The original 'generate_content' method to be wrapped.
256
+ instance: The instance of the class where the original method is defined.
257
+ args: Positional arguments for the 'generate_content' method.
258
+ kwargs: Keyword arguments for the 'generate_content' method.
259
+
260
+ Returns:
261
+ The response from the original 'generate_content' method.
262
+ """
263
+
264
+ # Check if streaming is enabled for the API call
265
+ streaming = kwargs.get("stream", False)
266
+
267
+ # pylint: disable=no-else-return
268
+ if streaming:
269
+ # Special handling for streaming response to accommodate the nature of data flow
270
+ async def stream_generator():
271
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
272
+ # Placeholder for aggregating streaming response
273
+ llmresponse = ""
274
+
275
+ # Loop through streaming events capturing relevant details
276
+ async for event in await wrapped(*args, **kwargs):
277
+ llmresponse += str(event.text)
278
+ prompt_tokens = event.usage_metadata.prompt_token_count
279
+ completion_tokens = event.usage_metadata.candidates_token_count
280
+ total_tokens = event.usage_metadata.total_token_count
281
+ yield event
282
+
283
+ # Handling exception ensure observability without disrupting operation
284
+ try:
285
+ prompt = args[0][0]
286
+
287
+ model = "/".join(instance._model._model_name.split("/")[3:])
288
+
289
+ # Calculate cost of the operation
290
+ cost = get_chat_model_cost(model,
291
+ pricing_info, prompt_tokens,
292
+ completion_tokens)
293
+
294
+ # Set Span attributes
295
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
296
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
297
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
298
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
299
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
300
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
301
+ gen_ai_endpoint)
302
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
303
+ environment)
304
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
305
+ application_name)
306
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
307
+ model)
308
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
309
+ True)
310
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
311
+ prompt_tokens)
312
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
313
+ completion_tokens)
314
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
315
+ total_tokens)
316
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
317
+ cost)
318
+ if trace_content:
319
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
320
+ prompt)
321
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
322
+ llmresponse)
323
+
324
+ span.set_status(Status(StatusCode.OK))
325
+
326
+ if disable_metrics is False:
327
+ attributes = {
328
+ TELEMETRY_SDK_NAME:
329
+ "openlit",
330
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
331
+ application_name,
332
+ SemanticConvetion.GEN_AI_SYSTEM:
333
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
334
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
335
+ environment,
336
+ SemanticConvetion.GEN_AI_TYPE:
337
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
338
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
339
+ model
340
+ }
341
+
342
+ metrics["genai_requests"].add(1, attributes)
343
+ metrics["genai_total_tokens"].add(
344
+ total_tokens, attributes
345
+ )
346
+ metrics["genai_completion_tokens"].add(completion_tokens, attributes)
347
+ metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
348
+ metrics["genai_cost"].record(cost, attributes)
349
+
350
+ except Exception as e:
351
+ handle_exception(span, e)
352
+ logger.error("Error in trace creation: %s", e)
353
+
354
+ return stream_generator()
355
+
356
+ # Handling for non-streaming responses
357
+ else:
358
+ with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
359
+ response = await wrapped(*args, **kwargs)
360
+
361
+ try:
362
+ # Format 'messages' into a single string
363
+ prompt = args[0]
364
+
365
+ model = "/".join(instance._model._model_name.split("/")[3:])
366
+ # Calculate cost of the operation
367
+ cost = get_chat_model_cost(model,
368
+ pricing_info,
369
+ response.usage_metadata.prompt_token_count,
370
+ response.usage_metadata.candidates_token_count)
371
+
372
+ # Set Span attribues
373
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
374
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
375
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
376
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
377
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
378
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
379
+ gen_ai_endpoint)
380
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
381
+ environment)
382
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
383
+ application_name)
384
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
385
+ model)
386
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
387
+ False)
388
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
389
+ response.usage_metadata.prompt_token_count)
390
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
391
+ response.usage_metadata.candidates_token_count)
392
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
393
+ response.usage_metadata.total_token_count)
394
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
395
+ cost)
396
+ if trace_content:
397
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
398
+ prompt)
399
+ # pylint: disable=line-too-long
400
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
401
+ response.candidates[0].content.parts[0].text)
402
+
403
+ span.set_status(Status(StatusCode.OK))
404
+
405
+ if disable_metrics is False:
406
+ attributes = {
407
+ TELEMETRY_SDK_NAME:
408
+ "openlit",
409
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
410
+ application_name,
411
+ SemanticConvetion.GEN_AI_SYSTEM:
412
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
413
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
414
+ environment,
415
+ SemanticConvetion.GEN_AI_TYPE:
416
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
417
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
418
+ model
419
+ }
420
+
421
+ metrics["genai_requests"].add(1, attributes)
422
+ metrics["genai_total_tokens"].add(
423
+ response.usage_metadata.total_token_count, attributes)
424
+ metrics["genai_completion_tokens"].add(
425
+ response.usage_metadata.candidates_token_count, attributes)
426
+ metrics["genai_prompt_tokens"].add(
427
+ response.usage_metadata.prompt_token_count, attributes)
428
+ metrics["genai_cost"].record(cost, attributes)
429
+
430
+ # Return original response
431
+ return response
432
+
433
+ except Exception as e:
434
+ handle_exception(span, e)
435
+ logger.error("Error in trace creation: %s", e)
436
+
437
+ # Return original response
438
+ return response
439
+
440
+ return wrapper
441
+
442
+ def predict_async(gen_ai_endpoint, version, environment, application_name, tracer,
443
+ pricing_info, trace_content, metrics, disable_metrics):
444
+ """
445
+ Generates a telemetry wrapper for messages to collect metrics.
446
+
447
+ Args:
448
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
449
+ version: Version of the monitoring package.
450
+ environment: Deployment environment (e.g., production, staging).
451
+ application_name: Name of the application using the OpenAI API.
452
+ tracer: OpenTelemetry tracer for creating spans.
453
+ pricing_info: Information used for calculating the cost of OpenAI usage.
454
+ trace_content: Flag indicating whether to trace the actual content.
455
+
456
+ Returns:
457
+ A function that wraps the chat method to add telemetry.
458
+ """
459
+
460
+ async def wrapper(wrapped, instance, args, kwargs):
461
+ """
462
+ Wraps the 'generate_content' API call to add telemetry.
463
+
464
+ This collects metrics such as execution time, cost, and token usage, and handles errors
465
+ gracefully, adding details to the trace for observability.
466
+
467
+ Args:
468
+ wrapped: The original 'generate_content' method to be wrapped.
469
+ instance: The instance of the class where the original method is defined.
470
+ args: Positional arguments for the 'generate_content' method.
471
+ kwargs: Keyword arguments for the 'generate_content' method.
472
+
473
+ Returns:
474
+ The response from the original 'generate_content' method.
475
+ """
476
+
477
+ with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
478
+ response = await wrapped(*args, **kwargs)
479
+
480
+ try:
481
+ prompt = args[0]
482
+
483
+ model = instance._model_id
484
+ #pylint: disable=line-too-long
485
+ prompt_tokens = response._prediction_response.metadata["tokenMetadata"]["inputTokenCount"]["totalTokens"]
486
+ completion_tokens = response._prediction_response.metadata["tokenMetadata"]["outputTokenCount"]["totalTokens"]
487
+ total_tokens = prompt_tokens + completion_tokens
488
+
489
+ #Calculate cost of the operation
490
+ cost = get_chat_model_cost(model,
491
+ pricing_info, prompt_tokens,
492
+ completion_tokens)
493
+
494
+ # Set Span attribues
495
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
496
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
497
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
498
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
499
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
500
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
501
+ gen_ai_endpoint)
502
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
503
+ environment)
504
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
505
+ application_name)
506
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
507
+ model)
508
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
509
+ False)
510
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
511
+ prompt_tokens)
512
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
513
+ completion_tokens)
514
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
515
+ total_tokens)
516
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
517
+ cost)
518
+ if trace_content:
519
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
520
+ prompt)
521
+ # pylint: disable=line-too-long
522
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.text)
523
+
524
+ span.set_status(Status(StatusCode.OK))
525
+
526
+ if disable_metrics is False:
527
+ attributes = {
528
+ TELEMETRY_SDK_NAME:
529
+ "openlit",
530
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
531
+ application_name,
532
+ SemanticConvetion.GEN_AI_SYSTEM:
533
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
534
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
535
+ environment,
536
+ SemanticConvetion.GEN_AI_TYPE:
537
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
538
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
539
+ model
540
+ }
541
+
542
+ metrics["genai_requests"].add(1, attributes)
543
+ metrics["genai_total_tokens"].add(
544
+ total_tokens, attributes)
545
+ metrics["genai_completion_tokens"].add(
546
+ completion_tokens, attributes)
547
+ metrics["genai_prompt_tokens"].add(
548
+ prompt_tokens, attributes)
549
+ metrics["genai_cost"].record(cost, attributes)
550
+
551
+ # Return original response
552
+ return response
553
+
554
+ except Exception as e:
555
+ handle_exception(span, e)
556
+ logger.error("Error in trace creation: %s", e)
557
+
558
+ # Return original response
559
+ return response
560
+
561
+ return wrapper
562
+
563
+ def predict_streaming_async(gen_ai_endpoint, version, environment, application_name, tracer,
564
+ pricing_info, trace_content, metrics, disable_metrics):
565
+ """
566
+ Generates a telemetry wrapper for messages to collect metrics.
567
+
568
+ Args:
569
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
570
+ version: Version of the monitoring package.
571
+ environment: Deployment environment (e.g., production, staging).
572
+ application_name: Name of the application using the OpenAI API.
573
+ tracer: OpenTelemetry tracer for creating spans.
574
+ pricing_info: Information used for calculating the cost of OpenAI usage.
575
+ trace_content: Flag indicating whether to trace the actual content.
576
+
577
+ Returns:
578
+ A function that wraps the chat method to add telemetry.
579
+ """
580
+
581
+ async def wrapper(wrapped, instance, args, kwargs):
582
+ """
583
+ Wraps the 'predict' API call to add telemetry.
584
+
585
+ This collects metrics such as execution time, cost, and token usage, and handles errors
586
+ gracefully, adding details to the trace for observability.
587
+
588
+ Args:
589
+ wrapped: The original 'predict' method to be wrapped.
590
+ instance: The instance of the class where the original method is defined.
591
+ args: Positional arguments for the 'predict' method.
592
+ kwargs: Keyword arguments for the 'predict' method.
593
+
594
+ Returns:
595
+ The response from the original 'predict' method.
596
+ """
597
+
598
+ # Special handling for streaming response to accommodate the nature of data flow
599
+ async def stream_generator():
600
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
601
+ # Placeholder for aggregating streaming response
602
+ llmresponse = ""
603
+
604
+ # Loop through streaming events capturing relevant details
605
+ async for event in wrapped(*args, **kwargs):
606
+ llmresponse += str(event)
607
+ yield event
608
+
609
+ # Handling exception ensure observability without disrupting operation
610
+ try:
611
+ prompt = args[0]
612
+ llmresponse = llmresponse.split('TextGenerationResponse',
613
+ maxsplit=1)[0].rstrip()
614
+
615
+ prompt_tokens = math.ceil(len(prompt) / 4)
616
+ completion_tokens = math.ceil(len(llmresponse) / 4)
617
+ total_tokens = prompt_tokens + completion_tokens
618
+
619
+ model = instance._model_id
620
+
621
+ # Calculate cost of the operation
622
+ cost = get_chat_model_cost(model,
623
+ pricing_info, prompt_tokens,
624
+ completion_tokens)
625
+
626
+ # Set Span attributes
627
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
628
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
629
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
630
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
631
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
632
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
633
+ gen_ai_endpoint)
634
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
635
+ environment)
636
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
637
+ application_name)
638
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
639
+ model)
640
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
641
+ True)
642
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
643
+ prompt_tokens)
644
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
645
+ completion_tokens)
646
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
647
+ total_tokens)
648
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
649
+ cost)
650
+ if trace_content:
651
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
652
+ prompt)
653
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
654
+ llmresponse)
655
+
656
+ span.set_status(Status(StatusCode.OK))
657
+
658
+ if disable_metrics is False:
659
+ attributes = {
660
+ TELEMETRY_SDK_NAME:
661
+ "openlit",
662
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
663
+ application_name,
664
+ SemanticConvetion.GEN_AI_SYSTEM:
665
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
666
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
667
+ environment,
668
+ SemanticConvetion.GEN_AI_TYPE:
669
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
670
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
671
+ model
672
+ }
673
+
674
+ metrics["genai_requests"].add(1, attributes)
675
+ metrics["genai_total_tokens"].add(
676
+ total_tokens, attributes
677
+ )
678
+ metrics["genai_completion_tokens"].add(completion_tokens, attributes)
679
+ metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
680
+ metrics["genai_cost"].record(cost, attributes)
681
+
682
+ except Exception as e:
683
+ handle_exception(span, e)
684
+ logger.error("Error in trace creation: %s", e)
685
+
686
+ return stream_generator()
687
+
688
+ return wrapper
689
+
690
+ def start_chat_async(gen_ai_endpoint, version, environment, application_name, tracer,
691
+ pricing_info, trace_content, metrics, disable_metrics):
692
+ """
693
+ Generates a telemetry wrapper for messages to collect metrics.
694
+
695
+ Args:
696
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
697
+ version: Version of the monitoring package.
698
+ environment: Deployment environment (e.g., production, staging).
699
+ application_name: Name of the application using the OpenAI API.
700
+ tracer: OpenTelemetry tracer for creating spans.
701
+ pricing_info: Information used for calculating the cost of OpenAI usage.
702
+ trace_content: Flag indicating whether to trace the actual content.
703
+
704
+ Returns:
705
+ A function that wraps the chat method to add telemetry.
706
+ """
707
+
708
+ async def wrapper(wrapped, instance, args, kwargs):
709
+ """
710
+ Wraps the 'generate_content' API call to add telemetry.
711
+
712
+ This collects metrics such as execution time, cost, and token usage, and handles errors
713
+ gracefully, adding details to the trace for observability.
714
+
715
+ Args:
716
+ wrapped: The original 'generate_content' method to be wrapped.
717
+ instance: The instance of the class where the original method is defined.
718
+ args: Positional arguments for the 'generate_content' method.
719
+ kwargs: Keyword arguments for the 'generate_content' method.
720
+
721
+ Returns:
722
+ The response from the original 'generate_content' method.
723
+ """
724
+
725
+ with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
726
+ response = await wrapped(*args, **kwargs)
727
+
728
+ try:
729
+ prompt = args[0]
730
+
731
+ model = instance._model._model_id
732
+
733
+ #pylint: disable=line-too-long
734
+ prompt_tokens = response._prediction_response.metadata["tokenMetadata"]["inputTokenCount"]["totalTokens"]
735
+ completion_tokens = response._prediction_response.metadata["tokenMetadata"]["outputTokenCount"]["totalTokens"]
736
+ total_tokens = prompt_tokens + completion_tokens
737
+
738
+ #Calculate cost of the operation
739
+ cost = get_chat_model_cost(model,
740
+ pricing_info, prompt_tokens,
741
+ completion_tokens)
742
+
743
+ # Set Span attribues
744
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
745
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
746
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
747
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
748
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
749
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
750
+ gen_ai_endpoint)
751
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
752
+ environment)
753
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
754
+ application_name)
755
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
756
+ model)
757
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
758
+ False)
759
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
760
+ prompt_tokens)
761
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
762
+ completion_tokens)
763
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
764
+ total_tokens)
765
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
766
+ cost)
767
+ if trace_content:
768
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
769
+ prompt)
770
+ # pylint: disable=line-too-long
771
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.text)
772
+
773
+ span.set_status(Status(StatusCode.OK))
774
+
775
+ if disable_metrics is False:
776
+ attributes = {
777
+ TELEMETRY_SDK_NAME:
778
+ "openlit",
779
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
780
+ application_name,
781
+ SemanticConvetion.GEN_AI_SYSTEM:
782
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
783
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
784
+ environment,
785
+ SemanticConvetion.GEN_AI_TYPE:
786
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
787
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
788
+ model
789
+ }
790
+
791
+ metrics["genai_requests"].add(1, attributes)
792
+ metrics["genai_total_tokens"].add(
793
+ total_tokens, attributes)
794
+ metrics["genai_completion_tokens"].add(
795
+ completion_tokens, attributes)
796
+ metrics["genai_prompt_tokens"].add(
797
+ prompt_tokens, attributes)
798
+ metrics["genai_cost"].record(cost, attributes)
799
+
800
+ # Return original response
801
+ return response
802
+
803
+ except Exception as e:
804
+ handle_exception(span, e)
805
+ logger.error("Error in trace creation: %s", e)
806
+
807
+ # Return original response
808
+ return response
809
+
810
+ return wrapper
811
+
812
+ def start_chat_streaming_async(gen_ai_endpoint, version, environment, application_name, tracer,
813
+ pricing_info, trace_content, metrics, disable_metrics):
814
+ """
815
+ Generates a telemetry wrapper for messages to collect metrics.
816
+
817
+ Args:
818
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
819
+ version: Version of the monitoring package.
820
+ environment: Deployment environment (e.g., production, staging).
821
+ application_name: Name of the application using the OpenAI API.
822
+ tracer: OpenTelemetry tracer for creating spans.
823
+ pricing_info: Information used for calculating the cost of OpenAI usage.
824
+ trace_content: Flag indicating whether to trace the actual content.
825
+
826
+ Returns:
827
+ A function that wraps the chat method to add telemetry.
828
+ """
829
+
830
+ async def wrapper(wrapped, instance, args, kwargs):
831
+ """
832
+ Wraps the 'start_chat' API call to add telemetry.
833
+
834
+ This collects metrics such as execution time, cost, and token usage, and handles errors
835
+ gracefully, adding details to the trace for observability.
836
+
837
+ Args:
838
+ wrapped: The original 'start_chat' method to be wrapped.
839
+ instance: The instance of the class where the original method is defined.
840
+ args: Positional arguments for the 'start_chat' method.
841
+ kwargs: Keyword arguments for the 'start_chat' method.
842
+
843
+ Returns:
844
+ The response from the original 'start_chat' method.
845
+ """
846
+
847
+ # Special handling for streaming response to accommodate the nature of data flow
848
+ async def stream_generator():
849
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
850
+ # Placeholder for aggregating streaming response
851
+ llmresponse = ""
852
+
853
+ # Loop through streaming events capturing relevant details
854
+ async for event in wrapped(*args, **kwargs):
855
+ llmresponse += str(event.text)
856
+ yield event
857
+
858
+ # Handling exception ensure observability without disrupting operation
859
+ try:
860
+ prompt = args[0]
861
+
862
+ prompt_tokens = math.ceil(len(prompt) / 4)
863
+ completion_tokens = math.ceil(len(llmresponse) / 4)
864
+ total_tokens = prompt_tokens + completion_tokens
865
+
866
+ model = instance._model._model_id
867
+
868
+ # Calculate cost of the operation
869
+ cost = get_chat_model_cost(model,
870
+ pricing_info, prompt_tokens,
871
+ completion_tokens)
872
+
873
+ # Set Span attributes
874
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
875
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
876
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
877
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
878
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
879
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
880
+ gen_ai_endpoint)
881
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
882
+ environment)
883
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
884
+ application_name)
885
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
886
+ model)
887
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
888
+ True)
889
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
890
+ prompt_tokens)
891
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
892
+ completion_tokens)
893
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
894
+ total_tokens)
895
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
896
+ cost)
897
+ if trace_content:
898
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
899
+ prompt)
900
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
901
+ llmresponse)
902
+
903
+ span.set_status(Status(StatusCode.OK))
904
+
905
+ if disable_metrics is False:
906
+ attributes = {
907
+ TELEMETRY_SDK_NAME:
908
+ "openlit",
909
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
910
+ application_name,
911
+ SemanticConvetion.GEN_AI_SYSTEM:
912
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
913
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
914
+ environment,
915
+ SemanticConvetion.GEN_AI_TYPE:
916
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
917
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
918
+ model
919
+ }
920
+
921
+ metrics["genai_requests"].add(1, attributes)
922
+ metrics["genai_total_tokens"].add(
923
+ total_tokens, attributes
924
+ )
925
+ metrics["genai_completion_tokens"].add(completion_tokens, attributes)
926
+ metrics["genai_prompt_tokens"].add(prompt_tokens, attributes)
927
+ metrics["genai_cost"].record(cost, attributes)
928
+
929
+ except Exception as e:
930
+ handle_exception(span, e)
931
+ logger.error("Error in trace creation: %s", e)
932
+
933
+ return stream_generator()
934
+
935
+ return wrapper
936
+
937
+ def embeddings_async(gen_ai_endpoint, version, environment, application_name, tracer,
938
+ pricing_info, trace_content, metrics, disable_metrics):
939
+ """
940
+ Generates a telemetry wrapper for messages to collect metrics.
941
+
942
+ Args:
943
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
944
+ version: Version of the monitoring package.
945
+ environment: Deployment environment (e.g., production, staging).
946
+ application_name: Name of the application using the OpenAI API.
947
+ tracer: OpenTelemetry tracer for creating spans.
948
+ pricing_info: Information used for calculating the cost of OpenAI usage.
949
+ trace_content: Flag indicating whether to trace the actual content.
950
+
951
+ Returns:
952
+ A function that wraps the chat method to add telemetry.
953
+ """
954
+
955
+ async def wrapper(wrapped, instance, args, kwargs):
956
+ """
957
+ Wraps the 'generate_content' API call to add telemetry.
958
+
959
+ This collects metrics such as execution time, cost, and token usage, and handles errors
960
+ gracefully, adding details to the trace for observability.
961
+
962
+ Args:
963
+ wrapped: The original 'generate_content' method to be wrapped.
964
+ instance: The instance of the class where the original method is defined.
965
+ args: Positional arguments for the 'generate_content' method.
966
+ kwargs: Keyword arguments for the 'generate_content' method.
967
+
968
+ Returns:
969
+ The response from the original 'generate_content' method.
970
+ """
971
+
972
+ with tracer.start_as_current_span(gen_ai_endpoint, kind=SpanKind.CLIENT) as span:
973
+ response = await wrapped(*args, **kwargs)
974
+
975
+ try:
976
+ prompt = args[0][0]
977
+
978
+ model = instance._model_id
979
+
980
+ prompt_tokens = int(response[0].statistics.token_count)
981
+
982
+ #Calculate cost of the operation
983
+ cost = get_embed_model_cost(model,
984
+ pricing_info, prompt_tokens)
985
+
986
+ # Set Span attribues
987
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
988
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
989
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI)
990
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
991
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING)
992
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
993
+ gen_ai_endpoint)
994
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
995
+ environment)
996
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
997
+ application_name)
998
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
999
+ model)
1000
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
1001
+ False)
1002
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
1003
+ prompt_tokens)
1004
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
1005
+ prompt_tokens)
1006
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
1007
+ cost)
1008
+ if trace_content:
1009
+ span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
1010
+ prompt)
1011
+
1012
+ span.set_status(Status(StatusCode.OK))
1013
+
1014
+ if disable_metrics is False:
1015
+ attributes = {
1016
+ TELEMETRY_SDK_NAME:
1017
+ "openlit",
1018
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
1019
+ application_name,
1020
+ SemanticConvetion.GEN_AI_SYSTEM:
1021
+ SemanticConvetion.GEN_AI_SYSTEM_VERTEXAI,
1022
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
1023
+ environment,
1024
+ SemanticConvetion.GEN_AI_TYPE:
1025
+ SemanticConvetion.GEN_AI_TYPE_EMBEDDING,
1026
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
1027
+ model
1028
+ }
1029
+
1030
+ metrics["genai_requests"].add(1, attributes)
1031
+ metrics["genai_total_tokens"].add(
1032
+ prompt_tokens, attributes)
1033
+ metrics["genai_prompt_tokens"].add(
1034
+ prompt_tokens, attributes)
1035
+ metrics["genai_cost"].record(cost, attributes)
1036
+
1037
+ # Return original response
1038
+ return response
1039
+
1040
+ except Exception as e:
1041
+ handle_exception(span, e)
1042
+ logger.error("Error in trace creation: %s", e)
1043
+
1044
+ # Return original response
1045
+ return response
1046
+
1047
+ return wrapper