openlit 1.34.30__py3-none-any.whl → 1.34.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/__helpers.py +235 -86
- openlit/__init__.py +16 -13
- openlit/_instrumentors.py +2 -1
- openlit/evals/all.py +50 -21
- openlit/evals/bias_detection.py +47 -20
- openlit/evals/hallucination.py +53 -22
- openlit/evals/toxicity.py +50 -21
- openlit/evals/utils.py +54 -30
- openlit/guard/all.py +61 -19
- openlit/guard/prompt_injection.py +34 -14
- openlit/guard/restrict_topic.py +46 -15
- openlit/guard/sensitive_topic.py +34 -14
- openlit/guard/utils.py +58 -22
- openlit/instrumentation/ag2/__init__.py +24 -8
- openlit/instrumentation/ag2/ag2.py +34 -13
- openlit/instrumentation/ag2/async_ag2.py +34 -13
- openlit/instrumentation/ag2/utils.py +133 -30
- openlit/instrumentation/ai21/__init__.py +43 -14
- openlit/instrumentation/ai21/ai21.py +47 -21
- openlit/instrumentation/ai21/async_ai21.py +47 -21
- openlit/instrumentation/ai21/utils.py +299 -78
- openlit/instrumentation/anthropic/__init__.py +21 -4
- openlit/instrumentation/anthropic/anthropic.py +28 -17
- openlit/instrumentation/anthropic/async_anthropic.py +28 -17
- openlit/instrumentation/anthropic/utils.py +145 -35
- openlit/instrumentation/assemblyai/__init__.py +11 -2
- openlit/instrumentation/assemblyai/assemblyai.py +15 -4
- openlit/instrumentation/assemblyai/utils.py +120 -25
- openlit/instrumentation/astra/__init__.py +43 -10
- openlit/instrumentation/astra/astra.py +28 -5
- openlit/instrumentation/astra/async_astra.py +28 -5
- openlit/instrumentation/astra/utils.py +151 -55
- openlit/instrumentation/azure_ai_inference/__init__.py +43 -10
- openlit/instrumentation/azure_ai_inference/async_azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/azure_ai_inference.py +53 -21
- openlit/instrumentation/azure_ai_inference/utils.py +307 -83
- openlit/instrumentation/bedrock/__init__.py +21 -4
- openlit/instrumentation/bedrock/bedrock.py +63 -25
- openlit/instrumentation/bedrock/utils.py +139 -30
- openlit/instrumentation/chroma/__init__.py +89 -16
- openlit/instrumentation/chroma/chroma.py +28 -6
- openlit/instrumentation/chroma/utils.py +167 -51
- openlit/instrumentation/cohere/__init__.py +63 -18
- openlit/instrumentation/cohere/async_cohere.py +63 -24
- openlit/instrumentation/cohere/cohere.py +63 -24
- openlit/instrumentation/cohere/utils.py +286 -73
- openlit/instrumentation/controlflow/__init__.py +35 -9
- openlit/instrumentation/controlflow/controlflow.py +66 -33
- openlit/instrumentation/crawl4ai/__init__.py +25 -10
- openlit/instrumentation/crawl4ai/async_crawl4ai.py +78 -31
- openlit/instrumentation/crawl4ai/crawl4ai.py +78 -31
- openlit/instrumentation/crewai/__init__.py +40 -15
- openlit/instrumentation/crewai/async_crewai.py +32 -7
- openlit/instrumentation/crewai/crewai.py +32 -7
- openlit/instrumentation/crewai/utils.py +159 -56
- openlit/instrumentation/dynamiq/__init__.py +46 -12
- openlit/instrumentation/dynamiq/dynamiq.py +74 -33
- openlit/instrumentation/elevenlabs/__init__.py +23 -4
- openlit/instrumentation/elevenlabs/async_elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/elevenlabs.py +16 -4
- openlit/instrumentation/elevenlabs/utils.py +128 -25
- openlit/instrumentation/embedchain/__init__.py +11 -2
- openlit/instrumentation/embedchain/embedchain.py +68 -35
- openlit/instrumentation/firecrawl/__init__.py +24 -7
- openlit/instrumentation/firecrawl/firecrawl.py +46 -20
- openlit/instrumentation/google_ai_studio/__init__.py +45 -10
- openlit/instrumentation/google_ai_studio/async_google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/google_ai_studio.py +67 -44
- openlit/instrumentation/google_ai_studio/utils.py +180 -67
- openlit/instrumentation/gpt4all/__init__.py +22 -7
- openlit/instrumentation/gpt4all/gpt4all.py +67 -29
- openlit/instrumentation/gpt4all/utils.py +285 -61
- openlit/instrumentation/gpu/__init__.py +128 -47
- openlit/instrumentation/groq/__init__.py +21 -4
- openlit/instrumentation/groq/async_groq.py +33 -21
- openlit/instrumentation/groq/groq.py +33 -21
- openlit/instrumentation/groq/utils.py +192 -55
- openlit/instrumentation/haystack/__init__.py +70 -24
- openlit/instrumentation/haystack/async_haystack.py +28 -6
- openlit/instrumentation/haystack/haystack.py +28 -6
- openlit/instrumentation/haystack/utils.py +196 -74
- openlit/instrumentation/julep/__init__.py +69 -19
- openlit/instrumentation/julep/async_julep.py +53 -27
- openlit/instrumentation/julep/julep.py +53 -28
- openlit/instrumentation/langchain/__init__.py +74 -63
- openlit/instrumentation/langchain/callback_handler.py +1100 -0
- openlit/instrumentation/langchain_community/__init__.py +13 -2
- openlit/instrumentation/langchain_community/async_langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/langchain_community.py +23 -5
- openlit/instrumentation/langchain_community/utils.py +35 -9
- openlit/instrumentation/letta/__init__.py +68 -15
- openlit/instrumentation/letta/letta.py +99 -54
- openlit/instrumentation/litellm/__init__.py +43 -14
- openlit/instrumentation/litellm/async_litellm.py +51 -26
- openlit/instrumentation/litellm/litellm.py +51 -26
- openlit/instrumentation/litellm/utils.py +304 -102
- openlit/instrumentation/llamaindex/__init__.py +267 -90
- openlit/instrumentation/llamaindex/async_llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/llamaindex.py +28 -6
- openlit/instrumentation/llamaindex/utils.py +204 -91
- openlit/instrumentation/mem0/__init__.py +11 -2
- openlit/instrumentation/mem0/mem0.py +50 -29
- openlit/instrumentation/milvus/__init__.py +10 -2
- openlit/instrumentation/milvus/milvus.py +31 -6
- openlit/instrumentation/milvus/utils.py +166 -67
- openlit/instrumentation/mistral/__init__.py +63 -18
- openlit/instrumentation/mistral/async_mistral.py +63 -24
- openlit/instrumentation/mistral/mistral.py +63 -24
- openlit/instrumentation/mistral/utils.py +277 -69
- openlit/instrumentation/multion/__init__.py +69 -19
- openlit/instrumentation/multion/async_multion.py +57 -26
- openlit/instrumentation/multion/multion.py +57 -26
- openlit/instrumentation/ollama/__init__.py +39 -18
- openlit/instrumentation/ollama/async_ollama.py +57 -26
- openlit/instrumentation/ollama/ollama.py +57 -26
- openlit/instrumentation/ollama/utils.py +226 -50
- openlit/instrumentation/openai/__init__.py +156 -32
- openlit/instrumentation/openai/async_openai.py +147 -67
- openlit/instrumentation/openai/openai.py +150 -67
- openlit/instrumentation/openai/utils.py +657 -185
- openlit/instrumentation/openai_agents/__init__.py +5 -1
- openlit/instrumentation/openai_agents/processor.py +110 -90
- openlit/instrumentation/phidata/__init__.py +13 -5
- openlit/instrumentation/phidata/phidata.py +67 -32
- openlit/instrumentation/pinecone/__init__.py +48 -9
- openlit/instrumentation/pinecone/async_pinecone.py +27 -5
- openlit/instrumentation/pinecone/pinecone.py +27 -5
- openlit/instrumentation/pinecone/utils.py +153 -47
- openlit/instrumentation/premai/__init__.py +22 -7
- openlit/instrumentation/premai/premai.py +51 -26
- openlit/instrumentation/premai/utils.py +246 -59
- openlit/instrumentation/pydantic_ai/__init__.py +49 -22
- openlit/instrumentation/pydantic_ai/pydantic_ai.py +69 -16
- openlit/instrumentation/pydantic_ai/utils.py +89 -24
- openlit/instrumentation/qdrant/__init__.py +19 -4
- openlit/instrumentation/qdrant/async_qdrant.py +33 -7
- openlit/instrumentation/qdrant/qdrant.py +33 -7
- openlit/instrumentation/qdrant/utils.py +228 -93
- openlit/instrumentation/reka/__init__.py +23 -10
- openlit/instrumentation/reka/async_reka.py +17 -11
- openlit/instrumentation/reka/reka.py +17 -11
- openlit/instrumentation/reka/utils.py +138 -36
- openlit/instrumentation/together/__init__.py +44 -12
- openlit/instrumentation/together/async_together.py +50 -27
- openlit/instrumentation/together/together.py +50 -27
- openlit/instrumentation/together/utils.py +301 -71
- openlit/instrumentation/transformers/__init__.py +2 -1
- openlit/instrumentation/transformers/transformers.py +13 -3
- openlit/instrumentation/transformers/utils.py +139 -36
- openlit/instrumentation/vertexai/__init__.py +81 -16
- openlit/instrumentation/vertexai/async_vertexai.py +33 -15
- openlit/instrumentation/vertexai/utils.py +123 -27
- openlit/instrumentation/vertexai/vertexai.py +33 -15
- openlit/instrumentation/vllm/__init__.py +12 -5
- openlit/instrumentation/vllm/utils.py +121 -31
- openlit/instrumentation/vllm/vllm.py +16 -10
- openlit/otel/events.py +35 -10
- openlit/otel/metrics.py +32 -24
- openlit/otel/tracing.py +24 -9
- openlit/semcov/__init__.py +72 -6
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/METADATA +2 -1
- openlit-1.34.31.dist-info/RECORD +166 -0
- openlit/instrumentation/langchain/async_langchain.py +0 -102
- openlit/instrumentation/langchain/langchain.py +0 -102
- openlit/instrumentation/langchain/utils.py +0 -252
- openlit-1.34.30.dist-info/RECORD +0 -168
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/LICENSE +0 -0
- {openlit-1.34.30.dist-info → openlit-1.34.31.dist-info}/WHEEL +0 -0
@@ -1,6 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
VertexAI OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
+
|
4
5
|
import time
|
5
6
|
|
6
7
|
from opentelemetry.trace import Status, StatusCode
|
@@ -14,6 +15,7 @@ from openlit.__helpers import (
|
|
14
15
|
)
|
15
16
|
from openlit.semcov import SemanticConvention
|
16
17
|
|
18
|
+
|
17
19
|
def format_content(contents):
|
18
20
|
"""
|
19
21
|
Format the VertexAI contents into a string for span events.
|
@@ -37,7 +39,9 @@ def format_content(contents):
|
|
37
39
|
if part.thought:
|
38
40
|
content_str.append(f"thought: {part.thought}")
|
39
41
|
if part.code_execution_result:
|
40
|
-
content_str.append(
|
42
|
+
content_str.append(
|
43
|
+
f"code_execution_result: {part.code_execution_result}"
|
44
|
+
)
|
41
45
|
if part.executable_code:
|
42
46
|
content_str.append(f"executable_code: {part.executable_code}")
|
43
47
|
if part.file_data:
|
@@ -53,6 +57,7 @@ def format_content(contents):
|
|
53
57
|
|
54
58
|
return "\n".join(formatted_messages)
|
55
59
|
|
60
|
+
|
56
61
|
def process_chunk(scope, chunk):
|
57
62
|
"""
|
58
63
|
Process a chunk of response data and update state.
|
@@ -71,8 +76,18 @@ def process_chunk(scope, chunk):
|
|
71
76
|
scope._input_tokens = chunk.usage_metadata.prompt_token_count
|
72
77
|
scope._output_tokens = chunk.usage_metadata.candidates_token_count
|
73
78
|
|
74
|
-
|
75
|
-
|
79
|
+
|
80
|
+
def common_chat_logic(
|
81
|
+
scope,
|
82
|
+
pricing_info,
|
83
|
+
environment,
|
84
|
+
application_name,
|
85
|
+
metrics,
|
86
|
+
capture_message_content,
|
87
|
+
disable_metrics,
|
88
|
+
version,
|
89
|
+
is_stream,
|
90
|
+
):
|
76
91
|
"""
|
77
92
|
Process chat request and generate Telemetry
|
78
93
|
"""
|
@@ -86,13 +101,26 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
86
101
|
formatted_messages = format_content(contents)
|
87
102
|
prompt = formatted_messages or str(scope._args[0][0])
|
88
103
|
|
89
|
-
cost = get_chat_model_cost(
|
104
|
+
cost = get_chat_model_cost(
|
105
|
+
scope._request_model, pricing_info, scope._input_tokens, scope._output_tokens
|
106
|
+
)
|
90
107
|
|
91
108
|
# Common Span Attributes
|
92
|
-
common_span_attributes(
|
93
|
-
|
94
|
-
|
95
|
-
|
109
|
+
common_span_attributes(
|
110
|
+
scope,
|
111
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
112
|
+
SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
|
113
|
+
scope._server_address,
|
114
|
+
scope._server_port,
|
115
|
+
scope._request_model,
|
116
|
+
scope._request_model,
|
117
|
+
environment,
|
118
|
+
application_name,
|
119
|
+
is_stream,
|
120
|
+
scope._tbt,
|
121
|
+
scope._ttft,
|
122
|
+
version,
|
123
|
+
)
|
96
124
|
|
97
125
|
# Span Attributes for Request parameters (VertexAI-specific)
|
98
126
|
inference_config = scope._kwargs.get("generation_config", {})
|
@@ -115,18 +143,30 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
115
143
|
scope._span.set_attribute(attribute, value)
|
116
144
|
|
117
145
|
# Span Attributes for Response parameters
|
118
|
-
scope._span.set_attribute(
|
146
|
+
scope._span.set_attribute(
|
147
|
+
SemanticConvention.GEN_AI_OUTPUT_TYPE,
|
148
|
+
"text" if isinstance(scope._llmresponse, str) else "json",
|
149
|
+
)
|
119
150
|
|
120
151
|
# Span Attributes for Cost and Tokens
|
121
|
-
scope._span.set_attribute(
|
122
|
-
|
123
|
-
|
152
|
+
scope._span.set_attribute(
|
153
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, scope._input_tokens
|
154
|
+
)
|
155
|
+
scope._span.set_attribute(
|
156
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, scope._output_tokens
|
157
|
+
)
|
158
|
+
scope._span.set_attribute(
|
159
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
|
160
|
+
scope._input_tokens + scope._output_tokens,
|
161
|
+
)
|
124
162
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
125
163
|
|
126
164
|
# Span Attributes for Content
|
127
165
|
if capture_message_content:
|
128
166
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
129
|
-
scope._span.set_attribute(
|
167
|
+
scope._span.set_attribute(
|
168
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._llmresponse
|
169
|
+
)
|
130
170
|
|
131
171
|
# To be removed once the change to span_attributes (from span events) is complete
|
132
172
|
scope._span.add_event(
|
@@ -146,23 +186,69 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
146
186
|
|
147
187
|
# Record metrics
|
148
188
|
if not disable_metrics:
|
149
|
-
record_completion_metrics(
|
150
|
-
|
151
|
-
|
152
|
-
|
189
|
+
record_completion_metrics(
|
190
|
+
metrics,
|
191
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
192
|
+
SemanticConvention.GEN_AI_SYSTEM_VERTEXAI,
|
193
|
+
scope._server_address,
|
194
|
+
scope._server_port,
|
195
|
+
scope._request_model,
|
196
|
+
scope._request_model,
|
197
|
+
environment,
|
198
|
+
application_name,
|
199
|
+
scope._start_time,
|
200
|
+
scope._end_time,
|
201
|
+
scope._input_tokens,
|
202
|
+
scope._output_tokens,
|
203
|
+
cost,
|
204
|
+
scope._tbt,
|
205
|
+
scope._ttft,
|
206
|
+
)
|
207
|
+
|
153
208
|
|
154
|
-
def process_streaming_chat_response(
|
155
|
-
|
209
|
+
def process_streaming_chat_response(
|
210
|
+
scope,
|
211
|
+
pricing_info,
|
212
|
+
environment,
|
213
|
+
application_name,
|
214
|
+
metrics,
|
215
|
+
capture_message_content=False,
|
216
|
+
disable_metrics=False,
|
217
|
+
version="",
|
218
|
+
):
|
156
219
|
"""
|
157
220
|
Process streaming chat response and generate telemetry.
|
158
221
|
"""
|
159
222
|
|
160
|
-
common_chat_logic(
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
223
|
+
common_chat_logic(
|
224
|
+
scope,
|
225
|
+
pricing_info,
|
226
|
+
environment,
|
227
|
+
application_name,
|
228
|
+
metrics,
|
229
|
+
capture_message_content,
|
230
|
+
disable_metrics,
|
231
|
+
version,
|
232
|
+
is_stream=True,
|
233
|
+
)
|
234
|
+
|
235
|
+
|
236
|
+
def process_chat_response(
|
237
|
+
response,
|
238
|
+
request_model,
|
239
|
+
pricing_info,
|
240
|
+
server_port,
|
241
|
+
server_address,
|
242
|
+
environment,
|
243
|
+
application_name,
|
244
|
+
metrics,
|
245
|
+
start_time,
|
246
|
+
span,
|
247
|
+
capture_message_content=False,
|
248
|
+
disable_metrics=False,
|
249
|
+
version="1.0.0",
|
250
|
+
**kwargs,
|
251
|
+
):
|
166
252
|
"""
|
167
253
|
Process non-streaming chat response and generate telemetry.
|
168
254
|
"""
|
@@ -182,11 +268,21 @@ def process_chat_response(response, request_model, pricing_info, server_port, se
|
|
182
268
|
scope._kwargs = kwargs
|
183
269
|
scope._args = [kwargs.get("contents", [])]
|
184
270
|
|
185
|
-
common_chat_logic(
|
186
|
-
|
271
|
+
common_chat_logic(
|
272
|
+
scope,
|
273
|
+
pricing_info,
|
274
|
+
environment,
|
275
|
+
application_name,
|
276
|
+
metrics,
|
277
|
+
capture_message_content,
|
278
|
+
disable_metrics,
|
279
|
+
version,
|
280
|
+
is_stream=False,
|
281
|
+
)
|
187
282
|
|
188
283
|
return response
|
189
284
|
|
285
|
+
|
190
286
|
def extract_vertexai_details(instance):
|
191
287
|
"""
|
192
288
|
Extract VertexAI-specific details like location and model name.
|
@@ -19,8 +19,17 @@ from openlit.semcov import SemanticConvention
|
|
19
19
|
# Initialize logger for logging potential issues and operations
|
20
20
|
logger = logging.getLogger(__name__)
|
21
21
|
|
22
|
-
|
23
|
-
|
22
|
+
|
23
|
+
def send_message(
|
24
|
+
version,
|
25
|
+
environment,
|
26
|
+
application_name,
|
27
|
+
tracer,
|
28
|
+
pricing_info,
|
29
|
+
capture_message_content,
|
30
|
+
metrics,
|
31
|
+
disable_metrics,
|
32
|
+
):
|
24
33
|
"""
|
25
34
|
Generates a telemetry wrapper for VertexAI messages to collect metrics.
|
26
35
|
"""
|
@@ -31,16 +40,16 @@ def send_message(version, environment, application_name, tracer,
|
|
31
40
|
"""
|
32
41
|
|
33
42
|
def __init__(
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
43
|
+
self,
|
44
|
+
wrapped,
|
45
|
+
span,
|
46
|
+
span_name,
|
47
|
+
kwargs,
|
48
|
+
server_address,
|
49
|
+
server_port,
|
50
|
+
request_model,
|
51
|
+
args,
|
52
|
+
):
|
44
53
|
self.__wrapped__ = wrapped
|
45
54
|
self._span = span
|
46
55
|
self._span_name = span_name
|
@@ -88,7 +97,7 @@ def send_message(version, environment, application_name, tracer,
|
|
88
97
|
metrics=metrics,
|
89
98
|
capture_message_content=capture_message_content,
|
90
99
|
disable_metrics=disable_metrics,
|
91
|
-
version=version
|
100
|
+
version=version,
|
92
101
|
)
|
93
102
|
except Exception as e:
|
94
103
|
handle_exception(self._span, e)
|
@@ -108,7 +117,16 @@ def send_message(version, environment, application_name, tracer,
|
|
108
117
|
awaited_wrapped = wrapped(*args, **kwargs)
|
109
118
|
span = tracer.start_span(span_name, kind=SpanKind.CLIENT)
|
110
119
|
|
111
|
-
return TracedSyncStream(
|
120
|
+
return TracedSyncStream(
|
121
|
+
awaited_wrapped,
|
122
|
+
span,
|
123
|
+
span_name,
|
124
|
+
kwargs,
|
125
|
+
server_address,
|
126
|
+
server_port,
|
127
|
+
request_model,
|
128
|
+
args,
|
129
|
+
)
|
112
130
|
|
113
131
|
else:
|
114
132
|
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
@@ -130,7 +148,7 @@ def send_message(version, environment, application_name, tracer,
|
|
130
148
|
capture_message_content=capture_message_content,
|
131
149
|
disable_metrics=disable_metrics,
|
132
150
|
version=version,
|
133
|
-
**kwargs
|
151
|
+
**kwargs,
|
134
152
|
)
|
135
153
|
|
136
154
|
except Exception as e:
|
@@ -5,12 +5,11 @@ import importlib.metadata
|
|
5
5
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
6
6
|
from wrapt import wrap_function_wrapper
|
7
7
|
|
8
|
-
from openlit.instrumentation.vllm.vllm import
|
9
|
-
generate
|
10
|
-
)
|
8
|
+
from openlit.instrumentation.vllm.vllm import generate
|
11
9
|
|
12
10
|
_instruments = ("vllm >= 0.5.4",)
|
13
11
|
|
12
|
+
|
14
13
|
class VLLMInstrumentor(BaseInstrumentor):
|
15
14
|
"""
|
16
15
|
An instrumentor for vLLM client library.
|
@@ -33,8 +32,16 @@ class VLLMInstrumentor(BaseInstrumentor):
|
|
33
32
|
wrap_function_wrapper(
|
34
33
|
"vllm.entrypoints.llm",
|
35
34
|
"LLM.generate",
|
36
|
-
generate(
|
37
|
-
|
35
|
+
generate(
|
36
|
+
version,
|
37
|
+
environment,
|
38
|
+
application_name,
|
39
|
+
tracer,
|
40
|
+
pricing_info,
|
41
|
+
capture_message_content,
|
42
|
+
metrics,
|
43
|
+
disable_metrics,
|
44
|
+
),
|
38
45
|
)
|
39
46
|
|
40
47
|
def _uninstrument(self, **kwargs):
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
vLLM OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
+
|
4
5
|
import time
|
5
6
|
|
6
7
|
from opentelemetry.trace import Status, StatusCode
|
@@ -13,17 +14,19 @@ from openlit.__helpers import (
|
|
13
14
|
)
|
14
15
|
from openlit.semcov import SemanticConvention
|
15
16
|
|
17
|
+
|
16
18
|
def get_inference_config(args, kwargs):
|
17
19
|
"""
|
18
20
|
Safely extract inference configuration from args or kwargs.
|
19
21
|
"""
|
20
22
|
|
21
|
-
if
|
22
|
-
return kwargs[
|
23
|
+
if "sampling_params" in kwargs:
|
24
|
+
return kwargs["sampling_params"]
|
23
25
|
if len(args) > 1:
|
24
26
|
return args[1]
|
25
27
|
return None
|
26
28
|
|
29
|
+
|
27
30
|
def format_content(prompts):
|
28
31
|
"""
|
29
32
|
Process a list of prompts to extract content.
|
@@ -36,8 +39,18 @@ def format_content(prompts):
|
|
36
39
|
else:
|
37
40
|
return str(prompts)
|
38
41
|
|
39
|
-
|
40
|
-
|
42
|
+
|
43
|
+
def common_chat_logic(
|
44
|
+
scope,
|
45
|
+
pricing_info,
|
46
|
+
environment,
|
47
|
+
application_name,
|
48
|
+
metrics,
|
49
|
+
capture_message_content,
|
50
|
+
disable_metrics,
|
51
|
+
version,
|
52
|
+
is_stream,
|
53
|
+
):
|
41
54
|
"""
|
42
55
|
Process chat request and generate Telemetry
|
43
56
|
"""
|
@@ -60,37 +73,75 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
60
73
|
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
61
74
|
|
62
75
|
# Common Span Attributes
|
63
|
-
common_span_attributes(
|
64
|
-
|
65
|
-
|
66
|
-
|
76
|
+
common_span_attributes(
|
77
|
+
scope,
|
78
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
79
|
+
SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
80
|
+
scope._server_address,
|
81
|
+
scope._server_port,
|
82
|
+
request_model,
|
83
|
+
request_model,
|
84
|
+
environment,
|
85
|
+
application_name,
|
86
|
+
is_stream,
|
87
|
+
scope._tbt,
|
88
|
+
scope._ttft,
|
89
|
+
version,
|
90
|
+
)
|
67
91
|
|
68
92
|
# Span Attributes for Request parameters
|
69
93
|
inference_config = get_inference_config(scope._args, scope._kwargs)
|
70
94
|
if inference_config:
|
71
|
-
scope._span.set_attribute(
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
scope._span.set_attribute(
|
76
|
-
|
77
|
-
getattr(inference_config,
|
78
|
-
|
79
|
-
|
95
|
+
scope._span.set_attribute(
|
96
|
+
SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS,
|
97
|
+
getattr(inference_config, "max_tokens", -1),
|
98
|
+
)
|
99
|
+
scope._span.set_attribute(
|
100
|
+
SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES,
|
101
|
+
getattr(inference_config, "stop_sequences", []),
|
102
|
+
)
|
103
|
+
scope._span.set_attribute(
|
104
|
+
SemanticConvention.GEN_AI_REQUEST_TEMPERATURE,
|
105
|
+
getattr(inference_config, "temperature", 1.0),
|
106
|
+
)
|
107
|
+
scope._span.set_attribute(
|
108
|
+
SemanticConvention.GEN_AI_REQUEST_TOP_P,
|
109
|
+
getattr(inference_config, "top_p", 1.0),
|
110
|
+
)
|
111
|
+
scope._span.set_attribute(
|
112
|
+
SemanticConvention.GEN_AI_REQUEST_TOP_K,
|
113
|
+
getattr(inference_config, "top_k", -1),
|
114
|
+
)
|
115
|
+
scope._span.set_attribute(
|
116
|
+
SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
117
|
+
getattr(inference_config, "presence_penalty", 0.0),
|
118
|
+
)
|
119
|
+
scope._span.set_attribute(
|
120
|
+
SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
121
|
+
getattr(inference_config, "frequency_penalty", 0.0),
|
122
|
+
)
|
80
123
|
|
81
124
|
# Span Attributes for Response parameters
|
82
125
|
scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
|
83
126
|
|
84
127
|
# Span Attributes for Cost and Tokens
|
85
|
-
scope._span.set_attribute(
|
86
|
-
|
87
|
-
|
128
|
+
scope._span.set_attribute(
|
129
|
+
SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens
|
130
|
+
)
|
131
|
+
scope._span.set_attribute(
|
132
|
+
SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens
|
133
|
+
)
|
134
|
+
scope._span.set_attribute(
|
135
|
+
SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens
|
136
|
+
)
|
88
137
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
89
138
|
|
90
139
|
# Span Attributes for Content
|
91
140
|
if capture_message_content:
|
92
141
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
93
|
-
scope._span.set_attribute(
|
142
|
+
scope._span.set_attribute(
|
143
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion
|
144
|
+
)
|
94
145
|
|
95
146
|
# To be removed once the change to span_attributes (from span events) is complete
|
96
147
|
scope._span.add_event(
|
@@ -110,14 +161,44 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
110
161
|
|
111
162
|
# Metrics
|
112
163
|
if not disable_metrics:
|
113
|
-
record_completion_metrics(
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
164
|
+
record_completion_metrics(
|
165
|
+
metrics,
|
166
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
167
|
+
SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
168
|
+
scope._server_address,
|
169
|
+
scope._server_port,
|
170
|
+
request_model,
|
171
|
+
request_model,
|
172
|
+
environment,
|
173
|
+
application_name,
|
174
|
+
scope._start_time,
|
175
|
+
scope._end_time,
|
176
|
+
input_tokens,
|
177
|
+
output_tokens,
|
178
|
+
cost,
|
179
|
+
scope._tbt,
|
180
|
+
scope._ttft,
|
181
|
+
)
|
182
|
+
|
183
|
+
|
184
|
+
def process_chat_response(
|
185
|
+
instance,
|
186
|
+
response,
|
187
|
+
request_model,
|
188
|
+
pricing_info,
|
189
|
+
server_port,
|
190
|
+
server_address,
|
191
|
+
environment,
|
192
|
+
application_name,
|
193
|
+
metrics,
|
194
|
+
start_time,
|
195
|
+
span,
|
196
|
+
args,
|
197
|
+
kwargs,
|
198
|
+
capture_message_content=False,
|
199
|
+
disable_metrics=False,
|
200
|
+
version="1.0.0",
|
201
|
+
):
|
121
202
|
"""
|
122
203
|
Process chat request and generate Telemetry
|
123
204
|
"""
|
@@ -137,7 +218,16 @@ def process_chat_response(instance, response, request_model, pricing_info, serve
|
|
137
218
|
scope._args = args
|
138
219
|
scope._kwargs = kwargs
|
139
220
|
|
140
|
-
common_chat_logic(
|
141
|
-
|
221
|
+
common_chat_logic(
|
222
|
+
scope,
|
223
|
+
pricing_info,
|
224
|
+
environment,
|
225
|
+
application_name,
|
226
|
+
metrics,
|
227
|
+
capture_message_content,
|
228
|
+
disable_metrics,
|
229
|
+
version,
|
230
|
+
is_stream=False,
|
231
|
+
)
|
142
232
|
|
143
233
|
return response
|
@@ -4,17 +4,21 @@ Module for monitoring vLLM API calls.
|
|
4
4
|
|
5
5
|
import time
|
6
6
|
from opentelemetry.trace import SpanKind
|
7
|
-
from openlit.__helpers import
|
8
|
-
|
9
|
-
set_server_address_and_port
|
10
|
-
)
|
11
|
-
from openlit.instrumentation.vllm.utils import (
|
12
|
-
process_chat_response
|
13
|
-
)
|
7
|
+
from openlit.__helpers import handle_exception, set_server_address_and_port
|
8
|
+
from openlit.instrumentation.vllm.utils import process_chat_response
|
14
9
|
from openlit.semcov import SemanticConvention
|
15
10
|
|
16
|
-
|
17
|
-
|
11
|
+
|
12
|
+
def generate(
|
13
|
+
version,
|
14
|
+
environment,
|
15
|
+
application_name,
|
16
|
+
tracer,
|
17
|
+
pricing_info,
|
18
|
+
capture_message_content,
|
19
|
+
metrics,
|
20
|
+
disable_metrics,
|
21
|
+
):
|
18
22
|
"""
|
19
23
|
Generates a telemetry wrapper for GenAI function call
|
20
24
|
"""
|
@@ -23,7 +27,9 @@ def generate(version, environment, application_name, tracer, pricing_info,
|
|
23
27
|
"""
|
24
28
|
Wraps the GenAI function call.
|
25
29
|
"""
|
26
|
-
server_address, server_port = set_server_address_and_port(
|
30
|
+
server_address, server_port = set_server_address_and_port(
|
31
|
+
instance, "http://127.0.0.1", 443
|
32
|
+
)
|
27
33
|
request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
|
28
34
|
|
29
35
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
openlit/otel/events.py
CHANGED
@@ -4,10 +4,17 @@ Setups up OpenTelemetry events emitter
|
|
4
4
|
|
5
5
|
import os
|
6
6
|
from opentelemetry import _events, _logs
|
7
|
-
from opentelemetry.sdk.resources import
|
7
|
+
from opentelemetry.sdk.resources import (
|
8
|
+
SERVICE_NAME,
|
9
|
+
TELEMETRY_SDK_NAME,
|
10
|
+
DEPLOYMENT_ENVIRONMENT,
|
11
|
+
)
|
8
12
|
from opentelemetry.sdk.resources import Resource
|
9
13
|
from opentelemetry.sdk._events import EventLoggerProvider
|
10
|
-
from opentelemetry.sdk._logs.export import
|
14
|
+
from opentelemetry.sdk._logs.export import (
|
15
|
+
BatchLogRecordProcessor,
|
16
|
+
SimpleLogRecordProcessor,
|
17
|
+
)
|
11
18
|
from opentelemetry.sdk._logs import LoggerProvider
|
12
19
|
from opentelemetry.sdk._logs.export import ConsoleLogExporter
|
13
20
|
|
@@ -19,7 +26,15 @@ else:
|
|
19
26
|
# Global flag to check if the events provider initialization is complete.
|
20
27
|
EVENTS_SET = False
|
21
28
|
|
22
|
-
|
29
|
+
|
30
|
+
def setup_events(
|
31
|
+
application_name,
|
32
|
+
environment,
|
33
|
+
event_logger,
|
34
|
+
otlp_endpoint,
|
35
|
+
otlp_headers,
|
36
|
+
disable_batch,
|
37
|
+
):
|
23
38
|
"""Setup OpenTelemetry events with the given configuration.
|
24
39
|
|
25
40
|
Args:
|
@@ -42,10 +57,12 @@ def setup_events(application_name, environment, event_logger, otlp_endpoint, otl
|
|
42
57
|
try:
|
43
58
|
if not EVENTS_SET:
|
44
59
|
# Create resource with service and environment information
|
45
|
-
resource = Resource.create(
|
46
|
-
|
47
|
-
|
48
|
-
|
60
|
+
resource = Resource.create(
|
61
|
+
attributes={
|
62
|
+
SERVICE_NAME: application_name,
|
63
|
+
DEPLOYMENT_ENVIRONMENT: environment,
|
64
|
+
TELEMETRY_SDK_NAME: "openlit",
|
65
|
+
}
|
49
66
|
)
|
50
67
|
|
51
68
|
# Initialize the LoggerProvider with the created resource.
|
@@ -57,7 +74,9 @@ def setup_events(application_name, environment, event_logger, otlp_endpoint, otl
|
|
57
74
|
|
58
75
|
if otlp_headers is not None:
|
59
76
|
if isinstance(otlp_headers, dict):
|
60
|
-
headers_str =
|
77
|
+
headers_str = ",".join(
|
78
|
+
f"{key}={value}" for key, value in otlp_headers.items()
|
79
|
+
)
|
61
80
|
else:
|
62
81
|
headers_str = otlp_headers
|
63
82
|
|
@@ -67,10 +86,16 @@ def setup_events(application_name, environment, event_logger, otlp_endpoint, otl
|
|
67
86
|
if os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT"):
|
68
87
|
event_exporter = OTLPLogExporter()
|
69
88
|
# pylint: disable=line-too-long
|
70
|
-
logger_provider.add_log_record_processor(
|
89
|
+
logger_provider.add_log_record_processor(
|
90
|
+
SimpleLogRecordProcessor(event_exporter)
|
91
|
+
) if disable_batch else logger_provider.add_log_record_processor(
|
92
|
+
BatchLogRecordProcessor(event_exporter)
|
93
|
+
)
|
71
94
|
else:
|
72
95
|
event_exporter = ConsoleLogExporter()
|
73
|
-
logger_provider.add_log_record_processor(
|
96
|
+
logger_provider.add_log_record_processor(
|
97
|
+
SimpleLogRecordProcessor(event_exporter)
|
98
|
+
)
|
74
99
|
|
75
100
|
_logs.set_logger_provider(logger_provider)
|
76
101
|
event_provider = EventLoggerProvider()
|