openlit 1.16.2__py3-none-any.whl → 1.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. openlit/__init__.py +3 -0
  2. openlit/instrumentation/anthropic/anthropic.py +28 -10
  3. openlit/instrumentation/anthropic/async_anthropic.py +27 -10
  4. openlit/instrumentation/bedrock/bedrock.py +13 -5
  5. openlit/instrumentation/cohere/cohere.py +33 -12
  6. openlit/instrumentation/elevenlabs/async_elevenlabs.py +6 -2
  7. openlit/instrumentation/elevenlabs/elevenlabs.py +6 -2
  8. openlit/instrumentation/gpt4all/gpt4all.py +30 -10
  9. openlit/instrumentation/groq/async_groq.py +31 -11
  10. openlit/instrumentation/groq/groq.py +31 -11
  11. openlit/instrumentation/mistral/async_mistral.py +33 -12
  12. openlit/instrumentation/mistral/mistral.py +33 -12
  13. openlit/instrumentation/ollama/async_ollama.py +57 -20
  14. openlit/instrumentation/ollama/ollama.py +57 -20
  15. openlit/instrumentation/openai/async_azure_openai.py +94 -35
  16. openlit/instrumentation/openai/async_openai.py +68 -27
  17. openlit/instrumentation/openai/azure_openai.py +89 -31
  18. openlit/instrumentation/openai/openai.py +68 -29
  19. openlit/instrumentation/transformers/transformers.py +20 -16
  20. openlit/instrumentation/vertexai/async_vertexai.py +104 -35
  21. openlit/instrumentation/vertexai/vertexai.py +104 -35
  22. openlit/instrumentation/vllm/__init__.py +43 -0
  23. openlit/instrumentation/vllm/vllm.py +143 -0
  24. openlit/semcov/__init__.py +4 -1
  25. {openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/METADATA +3 -1
  26. {openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/RECORD +28 -26
  27. {openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/LICENSE +0 -0
  28. {openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/WHEEL +0 -0
@@ -103,10 +103,18 @@ def generate_content_async(gen_ai_endpoint, version, environment, application_na
103
103
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
104
104
  cost)
105
105
  if trace_content:
106
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
107
- prompt)
108
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
109
- llmresponse)
106
+ span.add_event(
107
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
108
+ attributes={
109
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
110
+ },
111
+ )
112
+ span.add_event(
113
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
114
+ attributes={
115
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
116
+ },
117
+ )
110
118
 
111
119
  span.set_status(Status(StatusCode.OK))
112
120
 
@@ -181,10 +189,19 @@ def generate_content_async(gen_ai_endpoint, version, environment, application_na
181
189
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
182
190
  cost)
183
191
  if trace_content:
184
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
185
- prompt)
186
- # pylint: disable=line-too-long
187
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.candidates[0].content.parts[0].text)
192
+ span.add_event(
193
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
194
+ attributes={
195
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
196
+ },
197
+ )
198
+ span.add_event(
199
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
200
+ attributes={
201
+ # pylint: disable=line-too-long
202
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
203
+ },
204
+ )
188
205
 
189
206
  span.set_status(Status(StatusCode.OK))
190
207
 
@@ -316,10 +333,18 @@ def send_message_async(gen_ai_endpoint, version, environment, application_name,
316
333
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
317
334
  cost)
318
335
  if trace_content:
319
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
320
- prompt)
321
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
322
- llmresponse)
336
+ span.add_event(
337
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
338
+ attributes={
339
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
340
+ },
341
+ )
342
+ span.add_event(
343
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
344
+ attributes={
345
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
346
+ },
347
+ )
323
348
 
324
349
  span.set_status(Status(StatusCode.OK))
325
350
 
@@ -394,11 +419,19 @@ def send_message_async(gen_ai_endpoint, version, environment, application_name,
394
419
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
395
420
  cost)
396
421
  if trace_content:
397
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
398
- prompt)
399
- # pylint: disable=line-too-long
400
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
401
- response.candidates[0].content.parts[0].text)
422
+ span.add_event(
423
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
424
+ attributes={
425
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
426
+ },
427
+ )
428
+ span.add_event(
429
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
430
+ attributes={
431
+ # pylint: disable=line-too-long
432
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
433
+ },
434
+ )
402
435
 
403
436
  span.set_status(Status(StatusCode.OK))
404
437
 
@@ -516,10 +549,18 @@ def predict_async(gen_ai_endpoint, version, environment, application_name, trace
516
549
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
517
550
  cost)
518
551
  if trace_content:
519
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
520
- prompt)
521
- # pylint: disable=line-too-long
522
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.text)
552
+ span.add_event(
553
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
554
+ attributes={
555
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
556
+ },
557
+ )
558
+ span.add_event(
559
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
560
+ attributes={
561
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
562
+ },
563
+ )
523
564
 
524
565
  span.set_status(Status(StatusCode.OK))
525
566
 
@@ -648,10 +689,18 @@ def predict_streaming_async(gen_ai_endpoint, version, environment, application_n
648
689
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
649
690
  cost)
650
691
  if trace_content:
651
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
652
- prompt)
653
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
654
- llmresponse)
692
+ span.add_event(
693
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
694
+ attributes={
695
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
696
+ },
697
+ )
698
+ span.add_event(
699
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
700
+ attributes={
701
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
702
+ },
703
+ )
655
704
 
656
705
  span.set_status(Status(StatusCode.OK))
657
706
 
@@ -765,10 +814,18 @@ def start_chat_async(gen_ai_endpoint, version, environment, application_name, tr
765
814
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
766
815
  cost)
767
816
  if trace_content:
768
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
769
- prompt)
770
- # pylint: disable=line-too-long
771
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.text)
817
+ span.add_event(
818
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
819
+ attributes={
820
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
821
+ },
822
+ )
823
+ span.add_event(
824
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
825
+ attributes={
826
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
827
+ },
828
+ )
772
829
 
773
830
  span.set_status(Status(StatusCode.OK))
774
831
 
@@ -895,10 +952,18 @@ def start_chat_streaming_async(gen_ai_endpoint, version, environment, applicatio
895
952
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
896
953
  cost)
897
954
  if trace_content:
898
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
899
- prompt)
900
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
901
- llmresponse)
955
+ span.add_event(
956
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
957
+ attributes={
958
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
959
+ },
960
+ )
961
+ span.add_event(
962
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
963
+ attributes={
964
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
965
+ },
966
+ )
902
967
 
903
968
  span.set_status(Status(StatusCode.OK))
904
969
 
@@ -1006,8 +1071,12 @@ def embeddings_async(gen_ai_endpoint, version, environment, application_name, tr
1006
1071
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
1007
1072
  cost)
1008
1073
  if trace_content:
1009
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
1010
- prompt)
1074
+ span.add_event(
1075
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
1076
+ attributes={
1077
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
1078
+ },
1079
+ )
1011
1080
 
1012
1081
  span.set_status(Status(StatusCode.OK))
1013
1082
 
@@ -103,10 +103,18 @@ def generate_content(gen_ai_endpoint, version, environment, application_name, tr
103
103
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
104
104
  cost)
105
105
  if trace_content:
106
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
107
- prompt)
108
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
109
- llmresponse)
106
+ span.add_event(
107
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
108
+ attributes={
109
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
110
+ },
111
+ )
112
+ span.add_event(
113
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
114
+ attributes={
115
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
116
+ },
117
+ )
110
118
 
111
119
  span.set_status(Status(StatusCode.OK))
112
120
 
@@ -181,10 +189,19 @@ def generate_content(gen_ai_endpoint, version, environment, application_name, tr
181
189
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
182
190
  cost)
183
191
  if trace_content:
184
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
185
- prompt)
186
- # pylint: disable=line-too-long
187
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.candidates[0].content.parts[0].text)
192
+ span.add_event(
193
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
194
+ attributes={
195
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
196
+ },
197
+ )
198
+ span.add_event(
199
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
200
+ attributes={
201
+ # pylint: disable=line-too-long
202
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
203
+ },
204
+ )
188
205
 
189
206
  span.set_status(Status(StatusCode.OK))
190
207
 
@@ -316,10 +333,18 @@ def send_message(gen_ai_endpoint, version, environment, application_name, tracer
316
333
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
317
334
  cost)
318
335
  if trace_content:
319
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
320
- prompt)
321
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
322
- llmresponse)
336
+ span.add_event(
337
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
338
+ attributes={
339
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
340
+ },
341
+ )
342
+ span.add_event(
343
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
344
+ attributes={
345
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
346
+ },
347
+ )
323
348
 
324
349
  span.set_status(Status(StatusCode.OK))
325
350
 
@@ -394,11 +419,19 @@ def send_message(gen_ai_endpoint, version, environment, application_name, tracer
394
419
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
395
420
  cost)
396
421
  if trace_content:
397
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
398
- prompt)
399
- # pylint: disable=line-too-long
400
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
401
- response.candidates[0].content.parts[0].text)
422
+ span.add_event(
423
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
424
+ attributes={
425
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
426
+ },
427
+ )
428
+ span.add_event(
429
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
430
+ attributes={
431
+ # pylint: disable=line-too-long
432
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
433
+ },
434
+ )
402
435
 
403
436
  span.set_status(Status(StatusCode.OK))
404
437
 
@@ -516,10 +549,18 @@ def predict(gen_ai_endpoint, version, environment, application_name, tracer,
516
549
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
517
550
  cost)
518
551
  if trace_content:
519
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
520
- prompt)
521
- # pylint: disable=line-too-long
522
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.text)
552
+ span.add_event(
553
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
554
+ attributes={
555
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
556
+ },
557
+ )
558
+ span.add_event(
559
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
560
+ attributes={
561
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
562
+ },
563
+ )
523
564
 
524
565
  span.set_status(Status(StatusCode.OK))
525
566
 
@@ -648,10 +689,18 @@ def predict_streaming(gen_ai_endpoint, version, environment, application_name, t
648
689
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
649
690
  cost)
650
691
  if trace_content:
651
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
652
- prompt)
653
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
654
- llmresponse)
692
+ span.add_event(
693
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
694
+ attributes={
695
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
696
+ },
697
+ )
698
+ span.add_event(
699
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
700
+ attributes={
701
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
702
+ },
703
+ )
655
704
 
656
705
  span.set_status(Status(StatusCode.OK))
657
706
 
@@ -765,10 +814,18 @@ def start_chat(gen_ai_endpoint, version, environment, application_name, tracer,
765
814
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
766
815
  cost)
767
816
  if trace_content:
768
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
769
- prompt)
770
- # pylint: disable=line-too-long
771
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION, response.text)
817
+ span.add_event(
818
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
819
+ attributes={
820
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
821
+ },
822
+ )
823
+ span.add_event(
824
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
825
+ attributes={
826
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
827
+ },
828
+ )
772
829
 
773
830
  span.set_status(Status(StatusCode.OK))
774
831
 
@@ -895,10 +952,18 @@ def start_chat_streaming(gen_ai_endpoint, version, environment, application_name
895
952
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
896
953
  cost)
897
954
  if trace_content:
898
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
899
- prompt)
900
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_COMPLETION,
901
- llmresponse)
955
+ span.add_event(
956
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
957
+ attributes={
958
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
959
+ },
960
+ )
961
+ span.add_event(
962
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
963
+ attributes={
964
+ SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
965
+ },
966
+ )
902
967
 
903
968
  span.set_status(Status(StatusCode.OK))
904
969
 
@@ -1006,8 +1071,12 @@ def embeddings(gen_ai_endpoint, version, environment, application_name, tracer,
1006
1071
  span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
1007
1072
  cost)
1008
1073
  if trace_content:
1009
- span.set_attribute(SemanticConvetion.GEN_AI_CONTENT_PROMPT,
1010
- prompt)
1074
+ span.add_event(
1075
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
1076
+ attributes={
1077
+ SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
1078
+ },
1079
+ )
1011
1080
 
1012
1081
  span.set_status(Status(StatusCode.OK))
1013
1082
 
@@ -0,0 +1,43 @@
1
+ # pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
2
+ """Initializer of Auto Instrumentation of vLLM Functions"""
3
+
4
+ from typing import Collection
5
+ import importlib.metadata
6
+ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
7
+ from wrapt import wrap_function_wrapper
8
+
9
+ from openlit.instrumentation.vllm.vllm import (
10
+ generate
11
+ )
12
+
13
+ _instruments = ("vllm >= 0.5.4",)
14
+
15
+ class VLLMInstrumentor(BaseInstrumentor):
16
+ """
17
+ An instrumentor for vLLM's client library.
18
+ """
19
+
20
+ def instrumentation_dependencies(self) -> Collection[str]:
21
+ return _instruments
22
+
23
+ def _instrument(self, **kwargs):
24
+ application_name = kwargs.get("application_name", "default_application")
25
+ environment = kwargs.get("environment", "default_environment")
26
+ tracer = kwargs.get("tracer")
27
+ metrics = kwargs.get("metrics_dict")
28
+ pricing_info = kwargs.get("pricing_info", {})
29
+ trace_content = kwargs.get("trace_content", False)
30
+ disable_metrics = kwargs.get("disable_metrics")
31
+ version = importlib.metadata.version("vllm")
32
+
33
+ # sync chat
34
+ wrap_function_wrapper(
35
+ "vllm",
36
+ "LLM.generate",
37
+ generate("vllm.generate", version, environment, application_name,
38
+ tracer, pricing_info, trace_content, metrics, disable_metrics),
39
+ )
40
+
41
+ def _uninstrument(self, **kwargs):
42
+ # Proper uninstrumentation logic to revert patched methods
43
+ pass
@@ -0,0 +1,143 @@
1
+ # pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
2
+ """
3
+ Module for monitoring vLLM API calls.
4
+ """
5
+
6
+ import logging
7
+ from opentelemetry.trace import SpanKind, Status, StatusCode
8
+ from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
9
+ from openlit.__helpers import handle_exception, general_tokens
10
+ from openlit.semcov import SemanticConvetion
11
+
12
+ # Initialize logger for logging potential issues and operations
13
+ logger = logging.getLogger(__name__)
14
+
15
+ def generate(gen_ai_endpoint, version, environment, application_name,
16
+ tracer, pricing_info, trace_content, metrics, disable_metrics):
17
+ """
18
+ Generates a telemetry wrapper for generate to collect metrics.
19
+
20
+ Args:
21
+ gen_ai_endpoint: Endpoint identifier for logging and tracing.
22
+ version: Version of the monitoring package.
23
+ environment: Deployment environment (e.g., production, staging).
24
+ application_name: Name of the application using the vLLM API.
25
+ tracer: OpenTelemetry tracer for creating spans.
26
+ pricing_info: Information used for calculating the cost of vLLM usage.
27
+ trace_content: Flag indicating whether to trace the actual content.
28
+
29
+ Returns:
30
+ A function that wraps the generate method to add telemetry.
31
+ """
32
+
33
+ def wrapper(wrapped, instance, args, kwargs):
34
+ """
35
+ Wraps the 'generate' API call to add telemetry.
36
+
37
+ This collects metrics such as execution time, cost, and token usage, and handles errors
38
+ gracefully, adding details to the trace for observability.
39
+
40
+ Args:
41
+ wrapped: The original 'generate' method to be wrapped.
42
+ instance: The instance of the class where the original method is defined.
43
+ args: Positional arguments for the 'generate' method.
44
+ kwargs: Keyword arguments for the 'generate' method.
45
+
46
+ Returns:
47
+ The response from the original 'generate' method.
48
+ """
49
+
50
+ # pylint: disable=line-too-long
51
+ with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
52
+ response = wrapped(*args, **kwargs)
53
+
54
+ try:
55
+ model = instance.llm_engine.model_config.model or "facebook/opt-125m"
56
+ # Set base span attribues
57
+ span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
58
+ span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
59
+ SemanticConvetion.GEN_AI_SYSTEM_VLLM)
60
+ span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
61
+ SemanticConvetion.GEN_AI_TYPE_CHAT)
62
+ span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
63
+ gen_ai_endpoint)
64
+ span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
65
+ environment)
66
+ span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
67
+ application_name)
68
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
69
+ model)
70
+ span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
71
+ False)
72
+ input_tokens = 0
73
+ output_tokens = 0
74
+ cost = 0
75
+
76
+ if trace_content:
77
+ prompt_attributes = {}
78
+ completion_attributes = {}
79
+
80
+ for i, output in enumerate(response):
81
+ prompt_attributes[f"{SemanticConvetion.GEN_AI_CONTENT_PROMPT}.{i}"] = output.prompt
82
+ completion_attributes[f"{SemanticConvetion.GEN_AI_CONTENT_COMPLETION}.{i}"] = output.outputs[0].text
83
+ input_tokens += general_tokens(output.prompt)
84
+ output_tokens += general_tokens(output.outputs[0].text)
85
+
86
+ # Add a single event for all prompts
87
+ span.add_event(
88
+ name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
89
+ attributes=prompt_attributes,
90
+ )
91
+
92
+ # Add a single event for all completions
93
+ span.add_event(
94
+ name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
95
+ attributes=completion_attributes,
96
+ )
97
+
98
+ total_tokens = input_tokens + output_tokens
99
+
100
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
101
+ input_tokens)
102
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
103
+ output_tokens)
104
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
105
+ total_tokens)
106
+ span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
107
+ cost)
108
+
109
+ span.set_status(Status(StatusCode.OK))
110
+
111
+ if disable_metrics is False:
112
+ attributes = {
113
+ TELEMETRY_SDK_NAME:
114
+ "openlit",
115
+ SemanticConvetion.GEN_AI_APPLICATION_NAME:
116
+ application_name,
117
+ SemanticConvetion.GEN_AI_SYSTEM:
118
+ SemanticConvetion.GEN_AI_SYSTEM_VLLM,
119
+ SemanticConvetion.GEN_AI_ENVIRONMENT:
120
+ environment,
121
+ SemanticConvetion.GEN_AI_TYPE:
122
+ SemanticConvetion.GEN_AI_TYPE_CHAT,
123
+ SemanticConvetion.GEN_AI_REQUEST_MODEL:
124
+ model
125
+ }
126
+
127
+ metrics["genai_requests"].add(1, attributes)
128
+ metrics["genai_total_tokens"].add(total_tokens, attributes)
129
+ metrics["genai_completion_tokens"].add(output_tokens, attributes)
130
+ metrics["genai_prompt_tokens"].add(input_tokens, attributes)
131
+ metrics["genai_cost"].record(cost, attributes)
132
+
133
+ # Return original response
134
+ return response
135
+
136
+ except Exception as e:
137
+ handle_exception(span, e)
138
+ logger.error("Error in trace creation: %s", e)
139
+
140
+ # Return original response
141
+ return response
142
+
143
+ return wrapper
@@ -70,7 +70,9 @@ class SemanticConvetion:
70
70
  GEN_AI_RESPONSE_IMAGE = "gen_ai.response.image" # Not used directly in code yet
71
71
 
72
72
  # GenAI Content
73
+ GEN_AI_CONTENT_PROMPT_EVENT = "gen_ai.content.prompt"
73
74
  GEN_AI_CONTENT_PROMPT = "gen_ai.prompt"
75
+ GEN_AI_CONTENT_COMPLETION_EVENT = "gen_ai.content.completion"
74
76
  GEN_AI_CONTENT_COMPLETION = "gen_ai.completion"
75
77
  GEN_AI_CONTENT_REVISED_PROMPT = "gen_ai.content.revised_prompt"
76
78
 
@@ -94,11 +96,12 @@ class SemanticConvetion:
94
96
  GEN_AI_SYSTEM_COHERE = "cohere"
95
97
  GEN_AI_SYSTEM_MISTRAL = "mistral"
96
98
  GEN_AI_SYSTEM_BEDROCK = "bedrock"
97
- GEN_AI_SYSTEM_VERTEXAI = "vertexai"
99
+ GEN_AI_SYSTEM_VERTEXAI = "vertex_ai"
98
100
  GEN_AI_SYSTEM_GROQ = "groq"
99
101
  GEN_AI_SYSTEM_OLLAMA = "ollama"
100
102
  GEN_AI_SYSTEM_GPT4ALL = "gpt4all"
101
103
  GEN_AI_SYSTEM_ELEVENLABS = "elevenlabs"
104
+ GEN_AI_SYSTEM_VLLM = "vLLM"
102
105
  GEN_AI_SYSTEM_LANGCHAIN = "langchain"
103
106
  GEN_AI_SYSTEM_LLAMAINDEX = "llama_index"
104
107
  GEN_AI_SYSTEM_HAYSTACK = "haystack"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: openlit
3
- Version: 1.16.2
3
+ Version: 1.18.0
4
4
  Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects
5
5
  Home-page: https://github.com/openlit/openlit/tree/main/openlit/python
6
6
  Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT
@@ -68,6 +68,8 @@ This project adheres to the [Semantic Conventions](https://github.com/open-telem
68
68
  | [✅ Vertex AI](https://docs.openlit.io/latest/integrations/vertexai) | | | |
69
69
  | [✅ Groq](https://docs.openlit.io/latest/integrations/groq) | | | |
70
70
  | [✅ ElevenLabs](https://docs.openlit.io/latest/integrations/elevenlabs) | | | |
71
+ | [✅ vLLM](https://docs.openlit.io/latest/integrations/vllm) | | | |
72
+
71
73
  ## Supported Destinations
72
74
  - [✅ OpenTelemetry Collector](https://docs.openlit.io/latest/connections/otelcol)
73
75
  - [✅ Prometheus + Tempo](https://docs.openlit.io/latest/connections/prometheus-tempo)