openlit 1.34.12__py3-none-any.whl → 1.34.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/instrumentation/vllm/__init__.py +5 -7
- openlit/instrumentation/vllm/utils.py +85 -103
- openlit/instrumentation/vllm/vllm.py +3 -8
- {openlit-1.34.12.dist-info → openlit-1.34.13.dist-info}/METADATA +1 -1
- {openlit-1.34.12.dist-info → openlit-1.34.13.dist-info}/RECORD +7 -7
- {openlit-1.34.12.dist-info → openlit-1.34.13.dist-info}/LICENSE +0 -0
- {openlit-1.34.12.dist-info → openlit-1.34.13.dist-info}/WHEEL +0 -0
@@ -1,4 +1,3 @@
|
|
1
|
-
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
1
|
"""Initializer of Auto Instrumentation of vLLM Functions"""
|
3
2
|
|
4
3
|
from typing import Collection
|
@@ -14,15 +13,15 @@ _instruments = ("vllm >= 0.5.4",)
|
|
14
13
|
|
15
14
|
class VLLMInstrumentor(BaseInstrumentor):
|
16
15
|
"""
|
17
|
-
An instrumentor for vLLM
|
16
|
+
An instrumentor for vLLM client library.
|
18
17
|
"""
|
19
18
|
|
20
19
|
def instrumentation_dependencies(self) -> Collection[str]:
|
21
20
|
return _instruments
|
22
21
|
|
23
22
|
def _instrument(self, **kwargs):
|
24
|
-
application_name = kwargs.get("application_name", "
|
25
|
-
environment = kwargs.get("environment", "
|
23
|
+
application_name = kwargs.get("application_name", "default")
|
24
|
+
environment = kwargs.get("environment", "default")
|
26
25
|
tracer = kwargs.get("tracer")
|
27
26
|
metrics = kwargs.get("metrics_dict")
|
28
27
|
pricing_info = kwargs.get("pricing_info", {})
|
@@ -30,14 +29,13 @@ class VLLMInstrumentor(BaseInstrumentor):
|
|
30
29
|
disable_metrics = kwargs.get("disable_metrics")
|
31
30
|
version = importlib.metadata.version("vllm")
|
32
31
|
|
33
|
-
#
|
32
|
+
# Chat completions
|
34
33
|
wrap_function_wrapper(
|
35
34
|
"vllm.entrypoints.llm",
|
36
35
|
"LLM.generate",
|
37
36
|
generate(version, environment, application_name,
|
38
|
-
|
37
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
39
38
|
)
|
40
39
|
|
41
40
|
def _uninstrument(self, **kwargs):
|
42
|
-
# Proper uninstrumentation logic to revert patched methods
|
43
41
|
pass
|
@@ -1,15 +1,15 @@
|
|
1
1
|
"""
|
2
|
-
|
2
|
+
vLLM OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
-
|
5
4
|
import time
|
6
|
-
|
5
|
+
|
7
6
|
from opentelemetry.trace import Status, StatusCode
|
7
|
+
|
8
8
|
from openlit.__helpers import (
|
9
|
-
calculate_tbt,
|
10
|
-
get_chat_model_cost,
|
11
9
|
general_tokens,
|
12
|
-
|
10
|
+
get_chat_model_cost,
|
11
|
+
common_span_attributes,
|
12
|
+
record_completion_metrics,
|
13
13
|
)
|
14
14
|
from openlit.semcov import SemanticConvention
|
15
15
|
|
@@ -24,77 +24,81 @@ def get_inference_config(args, kwargs):
|
|
24
24
|
return args[1]
|
25
25
|
return None
|
26
26
|
|
27
|
+
def format_content(prompts):
|
28
|
+
"""
|
29
|
+
Process a list of prompts to extract content.
|
30
|
+
"""
|
31
|
+
|
32
|
+
if isinstance(prompts, str):
|
33
|
+
return prompts
|
34
|
+
elif isinstance(prompts, list):
|
35
|
+
return "\n".join(str(prompt) for prompt in prompts)
|
36
|
+
else:
|
37
|
+
return str(prompts)
|
38
|
+
|
27
39
|
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
28
40
|
capture_message_content, disable_metrics, version, is_stream):
|
29
41
|
"""
|
30
42
|
Process chat request and generate Telemetry
|
31
43
|
"""
|
32
44
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
45
|
+
request_model = scope._request_model
|
46
|
+
|
47
|
+
# Extract prompts and completions from vLLM response
|
48
|
+
input_tokens = 0
|
49
|
+
output_tokens = 0
|
50
|
+
prompt = ""
|
51
|
+
completion = ""
|
52
|
+
|
53
|
+
for output in scope._response:
|
54
|
+
prompt += output.prompt + "\n"
|
55
|
+
if output.outputs and len(output.outputs) > 0:
|
56
|
+
completion += output.outputs[0].text + "\n"
|
57
|
+
input_tokens += general_tokens(output.prompt)
|
58
|
+
output_tokens += general_tokens(output.outputs[0].text)
|
59
|
+
|
60
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
61
|
+
|
62
|
+
# Common Span Attributes
|
63
|
+
common_span_attributes(scope,
|
64
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
65
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
66
|
+
environment, application_name, is_stream, scope._tbt, scope._ttft, version)
|
67
|
+
|
68
|
+
# Span Attributes for Request parameters
|
46
69
|
inference_config = get_inference_config(scope._args, scope._kwargs)
|
47
70
|
if inference_config:
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
(
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
value = getattr(inference_config, key, None)
|
60
|
-
if value is not None:
|
61
|
-
scope._span.set_attribute(attribute, value)
|
62
|
-
|
63
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._request_model)
|
71
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, getattr(inference_config, 'max_tokens', -1))
|
72
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, getattr(inference_config, 'stop_sequences', []))
|
73
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, getattr(inference_config, 'temperature', 1.0))
|
74
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, getattr(inference_config, 'top_p', 1.0))
|
75
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, getattr(inference_config, 'top_k', -1))
|
76
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
77
|
+
getattr(inference_config, 'presence_penalty', 0.0))
|
78
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
79
|
+
getattr(inference_config, 'frequency_penalty', 0.0))
|
80
|
+
|
81
|
+
# Span Attributes for Response parameters
|
64
82
|
scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
|
65
83
|
|
66
|
-
#
|
67
|
-
scope._span.set_attribute(
|
68
|
-
scope._span.set_attribute(
|
69
|
-
scope._span.set_attribute(SemanticConvention.
|
70
|
-
scope._span.set_attribute(SemanticConvention.
|
71
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
|
72
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
73
|
-
|
74
|
-
input_tokens = 0
|
75
|
-
output_tokens = 0
|
76
|
-
cost = 0
|
84
|
+
# Span Attributes for Cost and Tokens
|
85
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
86
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
87
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
88
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
77
89
|
|
90
|
+
# Span Attributes for Content
|
78
91
|
if capture_message_content:
|
79
|
-
prompt
|
80
|
-
completion
|
92
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
93
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion)
|
81
94
|
|
82
|
-
|
83
|
-
prompt += output.prompt + "\n"
|
84
|
-
if output.outputs and len(output.outputs) > 0:
|
85
|
-
completion += output.outputs[0].text + "\n"
|
86
|
-
input_tokens += general_tokens(output.prompt)
|
87
|
-
output_tokens += general_tokens(output.outputs[0].text)
|
88
|
-
|
89
|
-
# Add a single event for prompt
|
95
|
+
# To be removed once the change to span_attributes (from span events) is complete
|
90
96
|
scope._span.add_event(
|
91
97
|
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
92
98
|
attributes={
|
93
99
|
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
94
100
|
},
|
95
101
|
)
|
96
|
-
|
97
|
-
# Add a single event for completion
|
98
102
|
scope._span.add_event(
|
99
103
|
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
100
104
|
attributes={
|
@@ -102,39 +106,14 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
102
106
|
},
|
103
107
|
)
|
104
108
|
|
105
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
106
|
-
input_tokens)
|
107
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
108
|
-
output_tokens)
|
109
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
|
110
|
-
input_tokens + output_tokens)
|
111
|
-
|
112
|
-
# Calculate cost of the operation
|
113
|
-
cost = get_chat_model_cost(scope._request_model, pricing_info, input_tokens, output_tokens)
|
114
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
115
|
-
|
116
109
|
scope._span.set_status(Status(StatusCode.OK))
|
117
110
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
request_model=scope._request_model,
|
125
|
-
server_address=scope._server_address,
|
126
|
-
server_port=scope._server_port,
|
127
|
-
response_model=scope._request_model,
|
128
|
-
)
|
129
|
-
metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
|
130
|
-
metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
|
131
|
-
metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
|
132
|
-
metrics['genai_requests'].add(1, metrics_attributes)
|
133
|
-
metrics['genai_completion_tokens'].add(output_tokens, metrics_attributes)
|
134
|
-
metrics['genai_prompt_tokens'].add(input_tokens, metrics_attributes)
|
135
|
-
metrics['genai_cost'].record(cost, metrics_attributes)
|
136
|
-
metrics['genai_client_usage_tokens'].record(
|
137
|
-
input_tokens + output_tokens, metrics_attributes)
|
111
|
+
# Metrics
|
112
|
+
if not disable_metrics:
|
113
|
+
record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
114
|
+
scope._server_address, scope._server_port, request_model, request_model, environment,
|
115
|
+
application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
|
116
|
+
cost, scope._tbt, scope._ttft)
|
138
117
|
|
139
118
|
def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
|
140
119
|
environment, application_name, metrics, start_time, span, args, kwargs,
|
@@ -142,20 +121,23 @@ def process_chat_response(instance, response, request_model, pricing_info, serve
|
|
142
121
|
"""
|
143
122
|
Process chat request and generate Telemetry
|
144
123
|
"""
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
124
|
+
|
125
|
+
# Create scope object
|
126
|
+
scope = type("GenericScope", (), {})()
|
127
|
+
|
128
|
+
scope._response = response
|
129
|
+
scope._start_time = start_time
|
130
|
+
scope._end_time = time.time()
|
131
|
+
scope._span = span
|
132
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
133
|
+
scope._server_address = server_address
|
134
|
+
scope._server_port = server_port
|
135
|
+
scope._request_model = request_model
|
136
|
+
scope._timestamps = []
|
137
|
+
scope._args = args
|
138
|
+
scope._kwargs = kwargs
|
139
|
+
|
140
|
+
common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
159
141
|
capture_message_content, disable_metrics, version, is_stream=False)
|
160
142
|
|
161
143
|
return response
|
@@ -2,7 +2,6 @@
|
|
2
2
|
Module for monitoring vLLM API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
6
|
from opentelemetry.trace import SpanKind
|
8
7
|
from openlit.__helpers import (
|
@@ -14,11 +13,8 @@ from openlit.instrumentation.vllm.utils import (
|
|
14
13
|
)
|
15
14
|
from openlit.semcov import SemanticConvention
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
def generate(version, environment, application_name,
|
21
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
16
|
+
def generate(version, environment, application_name, tracer, pricing_info,
|
17
|
+
capture_message_content, metrics, disable_metrics):
|
22
18
|
"""
|
23
19
|
Generates a telemetry wrapper for GenAI function call
|
24
20
|
"""
|
@@ -27,7 +23,6 @@ def generate(version, environment, application_name,
|
|
27
23
|
"""
|
28
24
|
Wraps the GenAI function call.
|
29
25
|
"""
|
30
|
-
|
31
26
|
server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
|
32
27
|
request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
|
33
28
|
|
@@ -56,9 +51,9 @@ def generate(version, environment, application_name,
|
|
56
51
|
disable_metrics=disable_metrics,
|
57
52
|
version=version,
|
58
53
|
)
|
54
|
+
|
59
55
|
except Exception as e:
|
60
56
|
handle_exception(span, e)
|
61
|
-
logger.error("Error in trace creation: %s", e)
|
62
57
|
|
63
58
|
return response
|
64
59
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: openlit
|
3
|
-
Version: 1.34.
|
3
|
+
Version: 1.34.13
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
|
@@ -131,14 +131,14 @@ openlit/instrumentation/transformers/utils.py,sha256=3f-ewpUpduaBrTVIFJKaabACjz-
|
|
131
131
|
openlit/instrumentation/vertexai/__init__.py,sha256=mT28WCBvQfRCkAWGL6bd0EjEPHvMjaNcz6T3jsLZh8k,3745
|
132
132
|
openlit/instrumentation/vertexai/async_vertexai.py,sha256=-kpg-eiL76O5_XopUPghCYwJHf0Nrxi00_Z5tCwq6zM,23086
|
133
133
|
openlit/instrumentation/vertexai/vertexai.py,sha256=5NB090aWlm9DnlccNNLRO6A97P_RN-JnHb5JS01tYyw,23000
|
134
|
-
openlit/instrumentation/vllm/__init__.py,sha256=
|
135
|
-
openlit/instrumentation/vllm/utils.py,sha256=
|
136
|
-
openlit/instrumentation/vllm/vllm.py,sha256=
|
134
|
+
openlit/instrumentation/vllm/__init__.py,sha256=uaSzQmgDuKJ-sh61sfVdzVt2qAZaozZIQ8sbmQ0XpZE,1357
|
135
|
+
openlit/instrumentation/vllm/utils.py,sha256=HuCPNBgChWg9vA7DHNFCij_y8qj27DjZxdZ0Nvdt2fg,5751
|
136
|
+
openlit/instrumentation/vllm/vllm.py,sha256=VzazF2f4LLwjZDO_G8lIN_d622oSJM0fIO9wjxXbhyg,2004
|
137
137
|
openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
|
138
138
|
openlit/otel/metrics.py,sha256=GM2PDloBGRhBTkHHkYaqmOwIAQkY124ZhW4sEqW1Fgk,7086
|
139
139
|
openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
|
140
140
|
openlit/semcov/__init__.py,sha256=ptyo37PY-FHDx_PShEvbdns71cD4YvvXw15bCRXKCKM,13461
|
141
|
-
openlit-1.34.
|
142
|
-
openlit-1.34.
|
143
|
-
openlit-1.34.
|
144
|
-
openlit-1.34.
|
141
|
+
openlit-1.34.13.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
142
|
+
openlit-1.34.13.dist-info/METADATA,sha256=4uHfQSKnuT-yfoNz7kj78yd53TBFDCDYVhOIsz7XF8k,23470
|
143
|
+
openlit-1.34.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
144
|
+
openlit-1.34.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|