openlit 1.16.2__py3-none-any.whl → 1.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__init__.py +3 -0
- openlit/instrumentation/anthropic/anthropic.py +28 -10
- openlit/instrumentation/anthropic/async_anthropic.py +27 -10
- openlit/instrumentation/bedrock/bedrock.py +13 -5
- openlit/instrumentation/cohere/cohere.py +33 -12
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +6 -2
- openlit/instrumentation/elevenlabs/elevenlabs.py +6 -2
- openlit/instrumentation/gpt4all/gpt4all.py +30 -10
- openlit/instrumentation/groq/async_groq.py +31 -11
- openlit/instrumentation/groq/groq.py +31 -11
- openlit/instrumentation/mistral/async_mistral.py +33 -12
- openlit/instrumentation/mistral/mistral.py +33 -12
- openlit/instrumentation/ollama/async_ollama.py +57 -20
- openlit/instrumentation/ollama/ollama.py +57 -20
- openlit/instrumentation/openai/async_azure_openai.py +94 -35
- openlit/instrumentation/openai/async_openai.py +68 -27
- openlit/instrumentation/openai/azure_openai.py +89 -31
- openlit/instrumentation/openai/openai.py +68 -29
- openlit/instrumentation/transformers/transformers.py +20 -16
- openlit/instrumentation/vertexai/async_vertexai.py +104 -35
- openlit/instrumentation/vertexai/vertexai.py +104 -35
- openlit/instrumentation/vllm/__init__.py +43 -0
- openlit/instrumentation/vllm/vllm.py +143 -0
- openlit/semcov/__init__.py +4 -1
- {openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/METADATA +3 -1
- {openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/RECORD +28 -26
- {openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/LICENSE +0 -0
- {openlit-1.16.2.dist-info → openlit-1.18.0.dist-info}/WHEEL +0 -0
@@ -103,10 +103,18 @@ def generate_content_async(gen_ai_endpoint, version, environment, application_na
|
|
103
103
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
104
104
|
cost)
|
105
105
|
if trace_content:
|
106
|
-
span.
|
107
|
-
|
108
|
-
|
109
|
-
|
106
|
+
span.add_event(
|
107
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
108
|
+
attributes={
|
109
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
110
|
+
},
|
111
|
+
)
|
112
|
+
span.add_event(
|
113
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
114
|
+
attributes={
|
115
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
116
|
+
},
|
117
|
+
)
|
110
118
|
|
111
119
|
span.set_status(Status(StatusCode.OK))
|
112
120
|
|
@@ -181,10 +189,19 @@ def generate_content_async(gen_ai_endpoint, version, environment, application_na
|
|
181
189
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
182
190
|
cost)
|
183
191
|
if trace_content:
|
184
|
-
span.
|
185
|
-
|
186
|
-
|
187
|
-
|
192
|
+
span.add_event(
|
193
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
194
|
+
attributes={
|
195
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
196
|
+
},
|
197
|
+
)
|
198
|
+
span.add_event(
|
199
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
200
|
+
attributes={
|
201
|
+
# pylint: disable=line-too-long
|
202
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
|
203
|
+
},
|
204
|
+
)
|
188
205
|
|
189
206
|
span.set_status(Status(StatusCode.OK))
|
190
207
|
|
@@ -316,10 +333,18 @@ def send_message_async(gen_ai_endpoint, version, environment, application_name,
|
|
316
333
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
317
334
|
cost)
|
318
335
|
if trace_content:
|
319
|
-
span.
|
320
|
-
|
321
|
-
|
322
|
-
|
336
|
+
span.add_event(
|
337
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
338
|
+
attributes={
|
339
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
340
|
+
},
|
341
|
+
)
|
342
|
+
span.add_event(
|
343
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
344
|
+
attributes={
|
345
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
346
|
+
},
|
347
|
+
)
|
323
348
|
|
324
349
|
span.set_status(Status(StatusCode.OK))
|
325
350
|
|
@@ -394,11 +419,19 @@ def send_message_async(gen_ai_endpoint, version, environment, application_name,
|
|
394
419
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
395
420
|
cost)
|
396
421
|
if trace_content:
|
397
|
-
span.
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
422
|
+
span.add_event(
|
423
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
424
|
+
attributes={
|
425
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
426
|
+
},
|
427
|
+
)
|
428
|
+
span.add_event(
|
429
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
430
|
+
attributes={
|
431
|
+
# pylint: disable=line-too-long
|
432
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
|
433
|
+
},
|
434
|
+
)
|
402
435
|
|
403
436
|
span.set_status(Status(StatusCode.OK))
|
404
437
|
|
@@ -516,10 +549,18 @@ def predict_async(gen_ai_endpoint, version, environment, application_name, trace
|
|
516
549
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
517
550
|
cost)
|
518
551
|
if trace_content:
|
519
|
-
span.
|
520
|
-
|
521
|
-
|
522
|
-
|
552
|
+
span.add_event(
|
553
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
554
|
+
attributes={
|
555
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
556
|
+
},
|
557
|
+
)
|
558
|
+
span.add_event(
|
559
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
560
|
+
attributes={
|
561
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
|
562
|
+
},
|
563
|
+
)
|
523
564
|
|
524
565
|
span.set_status(Status(StatusCode.OK))
|
525
566
|
|
@@ -648,10 +689,18 @@ def predict_streaming_async(gen_ai_endpoint, version, environment, application_n
|
|
648
689
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
649
690
|
cost)
|
650
691
|
if trace_content:
|
651
|
-
span.
|
652
|
-
|
653
|
-
|
654
|
-
|
692
|
+
span.add_event(
|
693
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
694
|
+
attributes={
|
695
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
696
|
+
},
|
697
|
+
)
|
698
|
+
span.add_event(
|
699
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
700
|
+
attributes={
|
701
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
702
|
+
},
|
703
|
+
)
|
655
704
|
|
656
705
|
span.set_status(Status(StatusCode.OK))
|
657
706
|
|
@@ -765,10 +814,18 @@ def start_chat_async(gen_ai_endpoint, version, environment, application_name, tr
|
|
765
814
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
766
815
|
cost)
|
767
816
|
if trace_content:
|
768
|
-
span.
|
769
|
-
|
770
|
-
|
771
|
-
|
817
|
+
span.add_event(
|
818
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
819
|
+
attributes={
|
820
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
821
|
+
},
|
822
|
+
)
|
823
|
+
span.add_event(
|
824
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
825
|
+
attributes={
|
826
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
|
827
|
+
},
|
828
|
+
)
|
772
829
|
|
773
830
|
span.set_status(Status(StatusCode.OK))
|
774
831
|
|
@@ -895,10 +952,18 @@ def start_chat_streaming_async(gen_ai_endpoint, version, environment, applicatio
|
|
895
952
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
896
953
|
cost)
|
897
954
|
if trace_content:
|
898
|
-
span.
|
899
|
-
|
900
|
-
|
901
|
-
|
955
|
+
span.add_event(
|
956
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
957
|
+
attributes={
|
958
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
959
|
+
},
|
960
|
+
)
|
961
|
+
span.add_event(
|
962
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
963
|
+
attributes={
|
964
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
965
|
+
},
|
966
|
+
)
|
902
967
|
|
903
968
|
span.set_status(Status(StatusCode.OK))
|
904
969
|
|
@@ -1006,8 +1071,12 @@ def embeddings_async(gen_ai_endpoint, version, environment, application_name, tr
|
|
1006
1071
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
1007
1072
|
cost)
|
1008
1073
|
if trace_content:
|
1009
|
-
span.
|
1010
|
-
|
1074
|
+
span.add_event(
|
1075
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
1076
|
+
attributes={
|
1077
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
1078
|
+
},
|
1079
|
+
)
|
1011
1080
|
|
1012
1081
|
span.set_status(Status(StatusCode.OK))
|
1013
1082
|
|
@@ -103,10 +103,18 @@ def generate_content(gen_ai_endpoint, version, environment, application_name, tr
|
|
103
103
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
104
104
|
cost)
|
105
105
|
if trace_content:
|
106
|
-
span.
|
107
|
-
|
108
|
-
|
109
|
-
|
106
|
+
span.add_event(
|
107
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
108
|
+
attributes={
|
109
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
110
|
+
},
|
111
|
+
)
|
112
|
+
span.add_event(
|
113
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
114
|
+
attributes={
|
115
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
116
|
+
},
|
117
|
+
)
|
110
118
|
|
111
119
|
span.set_status(Status(StatusCode.OK))
|
112
120
|
|
@@ -181,10 +189,19 @@ def generate_content(gen_ai_endpoint, version, environment, application_name, tr
|
|
181
189
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
182
190
|
cost)
|
183
191
|
if trace_content:
|
184
|
-
span.
|
185
|
-
|
186
|
-
|
187
|
-
|
192
|
+
span.add_event(
|
193
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
194
|
+
attributes={
|
195
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
196
|
+
},
|
197
|
+
)
|
198
|
+
span.add_event(
|
199
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
200
|
+
attributes={
|
201
|
+
# pylint: disable=line-too-long
|
202
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
|
203
|
+
},
|
204
|
+
)
|
188
205
|
|
189
206
|
span.set_status(Status(StatusCode.OK))
|
190
207
|
|
@@ -316,10 +333,18 @@ def send_message(gen_ai_endpoint, version, environment, application_name, tracer
|
|
316
333
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
317
334
|
cost)
|
318
335
|
if trace_content:
|
319
|
-
span.
|
320
|
-
|
321
|
-
|
322
|
-
|
336
|
+
span.add_event(
|
337
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
338
|
+
attributes={
|
339
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
340
|
+
},
|
341
|
+
)
|
342
|
+
span.add_event(
|
343
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
344
|
+
attributes={
|
345
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
346
|
+
},
|
347
|
+
)
|
323
348
|
|
324
349
|
span.set_status(Status(StatusCode.OK))
|
325
350
|
|
@@ -394,11 +419,19 @@ def send_message(gen_ai_endpoint, version, environment, application_name, tracer
|
|
394
419
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
395
420
|
cost)
|
396
421
|
if trace_content:
|
397
|
-
span.
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
422
|
+
span.add_event(
|
423
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
424
|
+
attributes={
|
425
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
426
|
+
},
|
427
|
+
)
|
428
|
+
span.add_event(
|
429
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
430
|
+
attributes={
|
431
|
+
# pylint: disable=line-too-long
|
432
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.candidates[0].content.parts[0].text,
|
433
|
+
},
|
434
|
+
)
|
402
435
|
|
403
436
|
span.set_status(Status(StatusCode.OK))
|
404
437
|
|
@@ -516,10 +549,18 @@ def predict(gen_ai_endpoint, version, environment, application_name, tracer,
|
|
516
549
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
517
550
|
cost)
|
518
551
|
if trace_content:
|
519
|
-
span.
|
520
|
-
|
521
|
-
|
522
|
-
|
552
|
+
span.add_event(
|
553
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
554
|
+
attributes={
|
555
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
556
|
+
},
|
557
|
+
)
|
558
|
+
span.add_event(
|
559
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
560
|
+
attributes={
|
561
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
|
562
|
+
},
|
563
|
+
)
|
523
564
|
|
524
565
|
span.set_status(Status(StatusCode.OK))
|
525
566
|
|
@@ -648,10 +689,18 @@ def predict_streaming(gen_ai_endpoint, version, environment, application_name, t
|
|
648
689
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
649
690
|
cost)
|
650
691
|
if trace_content:
|
651
|
-
span.
|
652
|
-
|
653
|
-
|
654
|
-
|
692
|
+
span.add_event(
|
693
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
694
|
+
attributes={
|
695
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
696
|
+
},
|
697
|
+
)
|
698
|
+
span.add_event(
|
699
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
700
|
+
attributes={
|
701
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
702
|
+
},
|
703
|
+
)
|
655
704
|
|
656
705
|
span.set_status(Status(StatusCode.OK))
|
657
706
|
|
@@ -765,10 +814,18 @@ def start_chat(gen_ai_endpoint, version, environment, application_name, tracer,
|
|
765
814
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
766
815
|
cost)
|
767
816
|
if trace_content:
|
768
|
-
span.
|
769
|
-
|
770
|
-
|
771
|
-
|
817
|
+
span.add_event(
|
818
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
819
|
+
attributes={
|
820
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
821
|
+
},
|
822
|
+
)
|
823
|
+
span.add_event(
|
824
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
825
|
+
attributes={
|
826
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: response.text,
|
827
|
+
},
|
828
|
+
)
|
772
829
|
|
773
830
|
span.set_status(Status(StatusCode.OK))
|
774
831
|
|
@@ -895,10 +952,18 @@ def start_chat_streaming(gen_ai_endpoint, version, environment, application_name
|
|
895
952
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
896
953
|
cost)
|
897
954
|
if trace_content:
|
898
|
-
span.
|
899
|
-
|
900
|
-
|
901
|
-
|
955
|
+
span.add_event(
|
956
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
957
|
+
attributes={
|
958
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
959
|
+
},
|
960
|
+
)
|
961
|
+
span.add_event(
|
962
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
963
|
+
attributes={
|
964
|
+
SemanticConvetion.GEN_AI_CONTENT_COMPLETION: llmresponse,
|
965
|
+
},
|
966
|
+
)
|
902
967
|
|
903
968
|
span.set_status(Status(StatusCode.OK))
|
904
969
|
|
@@ -1006,8 +1071,12 @@ def embeddings(gen_ai_endpoint, version, environment, application_name, tracer,
|
|
1006
1071
|
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
1007
1072
|
cost)
|
1008
1073
|
if trace_content:
|
1009
|
-
span.
|
1010
|
-
|
1074
|
+
span.add_event(
|
1075
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
1076
|
+
attributes={
|
1077
|
+
SemanticConvetion.GEN_AI_CONTENT_PROMPT: prompt,
|
1078
|
+
},
|
1079
|
+
)
|
1011
1080
|
|
1012
1081
|
span.set_status(Status(StatusCode.OK))
|
1013
1082
|
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
|
+
"""Initializer of Auto Instrumentation of vLLM Functions"""
|
3
|
+
|
4
|
+
from typing import Collection
|
5
|
+
import importlib.metadata
|
6
|
+
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
7
|
+
from wrapt import wrap_function_wrapper
|
8
|
+
|
9
|
+
from openlit.instrumentation.vllm.vllm import (
|
10
|
+
generate
|
11
|
+
)
|
12
|
+
|
13
|
+
_instruments = ("vllm >= 0.5.4",)
|
14
|
+
|
15
|
+
class VLLMInstrumentor(BaseInstrumentor):
|
16
|
+
"""
|
17
|
+
An instrumentor for vLLM's client library.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def instrumentation_dependencies(self) -> Collection[str]:
|
21
|
+
return _instruments
|
22
|
+
|
23
|
+
def _instrument(self, **kwargs):
|
24
|
+
application_name = kwargs.get("application_name", "default_application")
|
25
|
+
environment = kwargs.get("environment", "default_environment")
|
26
|
+
tracer = kwargs.get("tracer")
|
27
|
+
metrics = kwargs.get("metrics_dict")
|
28
|
+
pricing_info = kwargs.get("pricing_info", {})
|
29
|
+
trace_content = kwargs.get("trace_content", False)
|
30
|
+
disable_metrics = kwargs.get("disable_metrics")
|
31
|
+
version = importlib.metadata.version("vllm")
|
32
|
+
|
33
|
+
# sync chat
|
34
|
+
wrap_function_wrapper(
|
35
|
+
"vllm",
|
36
|
+
"LLM.generate",
|
37
|
+
generate("vllm.generate", version, environment, application_name,
|
38
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics),
|
39
|
+
)
|
40
|
+
|
41
|
+
def _uninstrument(self, **kwargs):
|
42
|
+
# Proper uninstrumentation logic to revert patched methods
|
43
|
+
pass
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# pylint: disable=duplicate-code, broad-exception-caught, too-many-statements, unused-argument, possibly-used-before-assignment
|
2
|
+
"""
|
3
|
+
Module for monitoring vLLM API calls.
|
4
|
+
"""
|
5
|
+
|
6
|
+
import logging
|
7
|
+
from opentelemetry.trace import SpanKind, Status, StatusCode
|
8
|
+
from opentelemetry.sdk.resources import TELEMETRY_SDK_NAME
|
9
|
+
from openlit.__helpers import handle_exception, general_tokens
|
10
|
+
from openlit.semcov import SemanticConvetion
|
11
|
+
|
12
|
+
# Initialize logger for logging potential issues and operations
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
def generate(gen_ai_endpoint, version, environment, application_name,
|
16
|
+
tracer, pricing_info, trace_content, metrics, disable_metrics):
|
17
|
+
"""
|
18
|
+
Generates a telemetry wrapper for generate to collect metrics.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
gen_ai_endpoint: Endpoint identifier for logging and tracing.
|
22
|
+
version: Version of the monitoring package.
|
23
|
+
environment: Deployment environment (e.g., production, staging).
|
24
|
+
application_name: Name of the application using the vLLM API.
|
25
|
+
tracer: OpenTelemetry tracer for creating spans.
|
26
|
+
pricing_info: Information used for calculating the cost of vLLM usage.
|
27
|
+
trace_content: Flag indicating whether to trace the actual content.
|
28
|
+
|
29
|
+
Returns:
|
30
|
+
A function that wraps the generate method to add telemetry.
|
31
|
+
"""
|
32
|
+
|
33
|
+
def wrapper(wrapped, instance, args, kwargs):
|
34
|
+
"""
|
35
|
+
Wraps the 'generate' API call to add telemetry.
|
36
|
+
|
37
|
+
This collects metrics such as execution time, cost, and token usage, and handles errors
|
38
|
+
gracefully, adding details to the trace for observability.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
wrapped: The original 'generate' method to be wrapped.
|
42
|
+
instance: The instance of the class where the original method is defined.
|
43
|
+
args: Positional arguments for the 'generate' method.
|
44
|
+
kwargs: Keyword arguments for the 'generate' method.
|
45
|
+
|
46
|
+
Returns:
|
47
|
+
The response from the original 'generate' method.
|
48
|
+
"""
|
49
|
+
|
50
|
+
# pylint: disable=line-too-long
|
51
|
+
with tracer.start_as_current_span(gen_ai_endpoint, kind= SpanKind.CLIENT) as span:
|
52
|
+
response = wrapped(*args, **kwargs)
|
53
|
+
|
54
|
+
try:
|
55
|
+
model = instance.llm_engine.model_config.model or "facebook/opt-125m"
|
56
|
+
# Set base span attribues
|
57
|
+
span.set_attribute(TELEMETRY_SDK_NAME, "openlit")
|
58
|
+
span.set_attribute(SemanticConvetion.GEN_AI_SYSTEM,
|
59
|
+
SemanticConvetion.GEN_AI_SYSTEM_VLLM)
|
60
|
+
span.set_attribute(SemanticConvetion.GEN_AI_TYPE,
|
61
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT)
|
62
|
+
span.set_attribute(SemanticConvetion.GEN_AI_ENDPOINT,
|
63
|
+
gen_ai_endpoint)
|
64
|
+
span.set_attribute(SemanticConvetion.GEN_AI_ENVIRONMENT,
|
65
|
+
environment)
|
66
|
+
span.set_attribute(SemanticConvetion.GEN_AI_APPLICATION_NAME,
|
67
|
+
application_name)
|
68
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_MODEL,
|
69
|
+
model)
|
70
|
+
span.set_attribute(SemanticConvetion.GEN_AI_REQUEST_IS_STREAM,
|
71
|
+
False)
|
72
|
+
input_tokens = 0
|
73
|
+
output_tokens = 0
|
74
|
+
cost = 0
|
75
|
+
|
76
|
+
if trace_content:
|
77
|
+
prompt_attributes = {}
|
78
|
+
completion_attributes = {}
|
79
|
+
|
80
|
+
for i, output in enumerate(response):
|
81
|
+
prompt_attributes[f"{SemanticConvetion.GEN_AI_CONTENT_PROMPT}.{i}"] = output.prompt
|
82
|
+
completion_attributes[f"{SemanticConvetion.GEN_AI_CONTENT_COMPLETION}.{i}"] = output.outputs[0].text
|
83
|
+
input_tokens += general_tokens(output.prompt)
|
84
|
+
output_tokens += general_tokens(output.outputs[0].text)
|
85
|
+
|
86
|
+
# Add a single event for all prompts
|
87
|
+
span.add_event(
|
88
|
+
name=SemanticConvetion.GEN_AI_CONTENT_PROMPT_EVENT,
|
89
|
+
attributes=prompt_attributes,
|
90
|
+
)
|
91
|
+
|
92
|
+
# Add a single event for all completions
|
93
|
+
span.add_event(
|
94
|
+
name=SemanticConvetion.GEN_AI_CONTENT_COMPLETION_EVENT,
|
95
|
+
attributes=completion_attributes,
|
96
|
+
)
|
97
|
+
|
98
|
+
total_tokens = input_tokens + output_tokens
|
99
|
+
|
100
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_PROMPT_TOKENS,
|
101
|
+
input_tokens)
|
102
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COMPLETION_TOKENS,
|
103
|
+
output_tokens)
|
104
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_TOTAL_TOKENS,
|
105
|
+
total_tokens)
|
106
|
+
span.set_attribute(SemanticConvetion.GEN_AI_USAGE_COST,
|
107
|
+
cost)
|
108
|
+
|
109
|
+
span.set_status(Status(StatusCode.OK))
|
110
|
+
|
111
|
+
if disable_metrics is False:
|
112
|
+
attributes = {
|
113
|
+
TELEMETRY_SDK_NAME:
|
114
|
+
"openlit",
|
115
|
+
SemanticConvetion.GEN_AI_APPLICATION_NAME:
|
116
|
+
application_name,
|
117
|
+
SemanticConvetion.GEN_AI_SYSTEM:
|
118
|
+
SemanticConvetion.GEN_AI_SYSTEM_VLLM,
|
119
|
+
SemanticConvetion.GEN_AI_ENVIRONMENT:
|
120
|
+
environment,
|
121
|
+
SemanticConvetion.GEN_AI_TYPE:
|
122
|
+
SemanticConvetion.GEN_AI_TYPE_CHAT,
|
123
|
+
SemanticConvetion.GEN_AI_REQUEST_MODEL:
|
124
|
+
model
|
125
|
+
}
|
126
|
+
|
127
|
+
metrics["genai_requests"].add(1, attributes)
|
128
|
+
metrics["genai_total_tokens"].add(total_tokens, attributes)
|
129
|
+
metrics["genai_completion_tokens"].add(output_tokens, attributes)
|
130
|
+
metrics["genai_prompt_tokens"].add(input_tokens, attributes)
|
131
|
+
metrics["genai_cost"].record(cost, attributes)
|
132
|
+
|
133
|
+
# Return original response
|
134
|
+
return response
|
135
|
+
|
136
|
+
except Exception as e:
|
137
|
+
handle_exception(span, e)
|
138
|
+
logger.error("Error in trace creation: %s", e)
|
139
|
+
|
140
|
+
# Return original response
|
141
|
+
return response
|
142
|
+
|
143
|
+
return wrapper
|
openlit/semcov/__init__.py
CHANGED
@@ -70,7 +70,9 @@ class SemanticConvetion:
|
|
70
70
|
GEN_AI_RESPONSE_IMAGE = "gen_ai.response.image" # Not used directly in code yet
|
71
71
|
|
72
72
|
# GenAI Content
|
73
|
+
GEN_AI_CONTENT_PROMPT_EVENT = "gen_ai.content.prompt"
|
73
74
|
GEN_AI_CONTENT_PROMPT = "gen_ai.prompt"
|
75
|
+
GEN_AI_CONTENT_COMPLETION_EVENT = "gen_ai.content.completion"
|
74
76
|
GEN_AI_CONTENT_COMPLETION = "gen_ai.completion"
|
75
77
|
GEN_AI_CONTENT_REVISED_PROMPT = "gen_ai.content.revised_prompt"
|
76
78
|
|
@@ -94,11 +96,12 @@ class SemanticConvetion:
|
|
94
96
|
GEN_AI_SYSTEM_COHERE = "cohere"
|
95
97
|
GEN_AI_SYSTEM_MISTRAL = "mistral"
|
96
98
|
GEN_AI_SYSTEM_BEDROCK = "bedrock"
|
97
|
-
GEN_AI_SYSTEM_VERTEXAI = "
|
99
|
+
GEN_AI_SYSTEM_VERTEXAI = "vertex_ai"
|
98
100
|
GEN_AI_SYSTEM_GROQ = "groq"
|
99
101
|
GEN_AI_SYSTEM_OLLAMA = "ollama"
|
100
102
|
GEN_AI_SYSTEM_GPT4ALL = "gpt4all"
|
101
103
|
GEN_AI_SYSTEM_ELEVENLABS = "elevenlabs"
|
104
|
+
GEN_AI_SYSTEM_VLLM = "vLLM"
|
102
105
|
GEN_AI_SYSTEM_LANGCHAIN = "langchain"
|
103
106
|
GEN_AI_SYSTEM_LLAMAINDEX = "llama_index"
|
104
107
|
GEN_AI_SYSTEM_HAYSTACK = "haystack"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: openlit
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.18.0
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications, facilitating the integration of observability into your GenAI-driven projects
|
5
5
|
Home-page: https://github.com/openlit/openlit/tree/main/openlit/python
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT
|
@@ -68,6 +68,8 @@ This project adheres to the [Semantic Conventions](https://github.com/open-telem
|
|
68
68
|
| [✅ Vertex AI](https://docs.openlit.io/latest/integrations/vertexai) | | | |
|
69
69
|
| [✅ Groq](https://docs.openlit.io/latest/integrations/groq) | | | |
|
70
70
|
| [✅ ElevenLabs](https://docs.openlit.io/latest/integrations/elevenlabs) | | | |
|
71
|
+
| [✅ vLLM](https://docs.openlit.io/latest/integrations/vllm) | | | |
|
72
|
+
|
71
73
|
## Supported Destinations
|
72
74
|
- [✅ OpenTelemetry Collector](https://docs.openlit.io/latest/connections/otelcol)
|
73
75
|
- [✅ Prometheus + Tempo](https://docs.openlit.io/latest/connections/prometheus-tempo)
|