openlit 1.34.12__py3-none-any.whl → 1.34.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openlit/instrumentation/transformers/__init__.py +12 -5
- openlit/instrumentation/transformers/transformers.py +21 -28
- openlit/instrumentation/transformers/utils.py +126 -110
- openlit/instrumentation/vllm/__init__.py +5 -7
- openlit/instrumentation/vllm/utils.py +85 -103
- openlit/instrumentation/vllm/vllm.py +3 -8
- {openlit-1.34.12.dist-info → openlit-1.34.14.dist-info}/METADATA +1 -1
- {openlit-1.34.12.dist-info → openlit-1.34.14.dist-info}/RECORD +10 -10
- {openlit-1.34.12.dist-info → openlit-1.34.14.dist-info}/LICENSE +0 -0
- {openlit-1.34.12.dist-info → openlit-1.34.14.dist-info}/WHEEL +0 -0
@@ -30,12 +30,19 @@ class TransformersInstrumentor(BaseInstrumentor):
|
|
30
30
|
version = importlib.metadata.version("transformers")
|
31
31
|
|
32
32
|
wrap_function_wrapper(
|
33
|
-
"transformers",
|
34
|
-
"TextGenerationPipeline.__call__",
|
35
|
-
pipeline_wrapper(
|
36
|
-
|
33
|
+
"transformers",
|
34
|
+
"TextGenerationPipeline.__call__",
|
35
|
+
pipeline_wrapper(
|
36
|
+
version,
|
37
|
+
environment,
|
38
|
+
application_name,
|
39
|
+
tracer,
|
40
|
+
pricing_info,
|
41
|
+
capture_message_content,
|
42
|
+
metrics,
|
43
|
+
disable_metrics
|
44
|
+
),
|
37
45
|
)
|
38
46
|
|
39
47
|
def _uninstrument(self, **kwargs):
|
40
|
-
# Proper uninstrumentation logic to revert patched methods
|
41
48
|
pass
|
@@ -2,22 +2,15 @@
|
|
2
2
|
Module for monitoring HF Transformers API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
6
|
from opentelemetry.trace import SpanKind
|
8
|
-
from openlit.__helpers import
|
9
|
-
|
10
|
-
)
|
11
|
-
from openlit.instrumentation.transformers.utils import (
|
12
|
-
process_chat_response,
|
13
|
-
)
|
7
|
+
from openlit.__helpers import set_server_address_and_port
|
8
|
+
from openlit.instrumentation.transformers.utils import process_chat_response
|
14
9
|
from openlit.semcov import SemanticConvention
|
15
10
|
|
16
|
-
# Initialize logger for logging potential issues and operations
|
17
|
-
logger = logging.getLogger(__name__)
|
18
11
|
|
19
|
-
def pipeline_wrapper(version, environment, application_name,
|
20
|
-
|
12
|
+
def pipeline_wrapper(version, environment, application_name, tracer, pricing_info,
|
13
|
+
capture_message_content, metrics, disable_metrics):
|
21
14
|
"""
|
22
15
|
Generates a telemetry wrapper for GenAI function call
|
23
16
|
"""
|
@@ -32,27 +25,27 @@ def pipeline_wrapper(version, environment, application_name,
|
|
32
25
|
|
33
26
|
span_name = f"{SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT} {request_model}"
|
34
27
|
|
35
|
-
with tracer.start_as_current_span(span_name, kind=
|
28
|
+
with tracer.start_as_current_span(span_name, kind=SpanKind.CLIENT) as span:
|
36
29
|
start_time = time.time()
|
37
30
|
response = wrapped(*args, **kwargs)
|
38
31
|
|
39
32
|
response = process_chat_response(
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
33
|
+
instance=instance,
|
34
|
+
response=response,
|
35
|
+
request_model=request_model,
|
36
|
+
pricing_info=pricing_info,
|
37
|
+
server_port=server_port,
|
38
|
+
server_address=server_address,
|
39
|
+
environment=environment,
|
40
|
+
application_name=application_name,
|
41
|
+
metrics=metrics,
|
42
|
+
start_time=start_time,
|
43
|
+
span=span,
|
44
|
+
args=args,
|
45
|
+
kwargs=kwargs,
|
46
|
+
capture_message_content=capture_message_content,
|
47
|
+
disable_metrics=disable_metrics,
|
48
|
+
version=version,
|
56
49
|
)
|
57
50
|
|
58
51
|
return response
|
@@ -3,19 +3,61 @@ HF Transformers OpenTelemetry instrumentation utility functions
|
|
3
3
|
"""
|
4
4
|
import time
|
5
5
|
|
6
|
-
from opentelemetry.sdk.resources import SERVICE_NAME, TELEMETRY_SDK_NAME, DEPLOYMENT_ENVIRONMENT
|
7
6
|
from opentelemetry.trace import Status, StatusCode
|
8
7
|
|
9
8
|
from openlit.__helpers import (
|
10
|
-
response_as_dict,
|
11
|
-
calculate_tbt,
|
12
9
|
general_tokens,
|
13
10
|
get_chat_model_cost,
|
14
|
-
|
15
|
-
|
11
|
+
common_span_attributes,
|
12
|
+
record_completion_metrics,
|
16
13
|
)
|
17
14
|
from openlit.semcov import SemanticConvention
|
18
15
|
|
16
|
+
def format_content(content):
|
17
|
+
"""
|
18
|
+
Format content to a consistent structure.
|
19
|
+
"""
|
20
|
+
if isinstance(content, str):
|
21
|
+
return content
|
22
|
+
elif isinstance(content, list):
|
23
|
+
# Check if its a list of chat messages (like in the test case)
|
24
|
+
if (len(content) > 0 and isinstance(content[0], dict) and
|
25
|
+
"role" in content[0] and "content" in content[0]):
|
26
|
+
# Handle chat message format like Groq
|
27
|
+
formatted_messages = []
|
28
|
+
for message in content:
|
29
|
+
role = message["role"]
|
30
|
+
msg_content = message["content"]
|
31
|
+
|
32
|
+
if isinstance(msg_content, list):
|
33
|
+
content_str = ", ".join(
|
34
|
+
f'{item["type"]}: {item["text"] if "text" in item else item.get("image_url", str(item))}'
|
35
|
+
if isinstance(item, dict) and "type" in item
|
36
|
+
else str(item)
|
37
|
+
for item in msg_content
|
38
|
+
)
|
39
|
+
formatted_messages.append(f"{role}: {content_str}")
|
40
|
+
else:
|
41
|
+
formatted_messages.append(f"{role}: {msg_content}")
|
42
|
+
return "\n".join(formatted_messages)
|
43
|
+
else:
|
44
|
+
# Handle other list formats (transformers responses)
|
45
|
+
formatted_content = []
|
46
|
+
for item in content:
|
47
|
+
if isinstance(item, str):
|
48
|
+
formatted_content.append(item)
|
49
|
+
elif isinstance(item, dict):
|
50
|
+
# Handle dict format for transformers
|
51
|
+
if "generated_text" in item:
|
52
|
+
formatted_content.append(str(item["generated_text"]))
|
53
|
+
else:
|
54
|
+
formatted_content.append(str(item))
|
55
|
+
else:
|
56
|
+
formatted_content.append(str(item))
|
57
|
+
return " ".join(formatted_content)
|
58
|
+
else:
|
59
|
+
return str(content)
|
60
|
+
|
19
61
|
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
20
62
|
capture_message_content, disable_metrics, version, args, kwargs, is_stream):
|
21
63
|
|
@@ -24,56 +66,42 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
24
66
|
"""
|
25
67
|
|
26
68
|
scope._end_time = time.time()
|
27
|
-
if len(scope._timestamps) > 1:
|
28
|
-
scope._tbt = calculate_tbt(scope._timestamps)
|
29
|
-
|
30
69
|
forward_params = scope._instance._forward_params
|
31
70
|
request_model = scope._instance.model.config.name_or_path
|
32
71
|
|
33
72
|
input_tokens = general_tokens(scope._prompt)
|
34
|
-
output_tokens = general_tokens(scope.
|
73
|
+
output_tokens = general_tokens(scope._completion)
|
35
74
|
|
36
75
|
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
37
76
|
|
38
|
-
#
|
39
|
-
scope
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
(SemanticConvention.GEN_AI_REQUEST_TOP_K, "top_k")
|
49
|
-
|
50
|
-
(SemanticConvention.
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
value = forward_params.get(key)
|
56
|
-
if value is not None:
|
57
|
-
scope._span.set_attribute(attribute, value)
|
58
|
-
|
59
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, request_model)
|
77
|
+
# Common Span Attributes
|
78
|
+
common_span_attributes(scope,
|
79
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
|
80
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
81
|
+
environment, application_name, is_stream, scope._tbt, scope._ttft, version)
|
82
|
+
|
83
|
+
# Set request parameters from forward_params
|
84
|
+
if forward_params.get("temperature") is not None:
|
85
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, forward_params["temperature"])
|
86
|
+
if forward_params.get("top_k") is not None:
|
87
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, forward_params["top_k"])
|
88
|
+
if forward_params.get("top_p") is not None:
|
89
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, forward_params["top_p"])
|
90
|
+
if forward_params.get("max_length") is not None:
|
91
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, forward_params["max_length"])
|
92
|
+
|
93
|
+
# Set token usage and cost attributes
|
60
94
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
61
95
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
62
|
-
scope._span.set_attribute(SemanticConvention.SERVER_ADDRESS, scope._server_address)
|
63
|
-
scope._span.set_attribute(DEPLOYMENT_ENVIRONMENT, environment)
|
64
|
-
scope._span.set_attribute(SERVICE_NAME, application_name)
|
65
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_IS_STREAM, is_stream)
|
66
96
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
67
97
|
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
68
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TBT, scope._tbt)
|
69
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
|
70
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
71
98
|
|
72
|
-
#
|
99
|
+
# Span Attributes for Content
|
73
100
|
if capture_message_content:
|
74
101
|
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, scope._prompt)
|
75
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope.
|
102
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, scope._completion)
|
76
103
|
|
104
|
+
# To be removed once the change to span_attributes (from span events) is complete
|
77
105
|
scope._span.add_event(
|
78
106
|
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
79
107
|
attributes={
|
@@ -83,32 +111,18 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
83
111
|
scope._span.add_event(
|
84
112
|
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
85
113
|
attributes={
|
86
|
-
SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope.
|
114
|
+
SemanticConvention.GEN_AI_CONTENT_COMPLETION: scope._completion,
|
87
115
|
},
|
88
116
|
)
|
89
117
|
|
90
118
|
scope._span.set_status(Status(StatusCode.OK))
|
91
119
|
|
120
|
+
# Record metrics using the standardized helper function
|
92
121
|
if not disable_metrics:
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
system=SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE,
|
98
|
-
request_model=request_model,
|
99
|
-
server_address=scope._server_address,
|
100
|
-
server_port=scope._server_port,
|
101
|
-
response_model=request_model,
|
102
|
-
)
|
103
|
-
|
104
|
-
metrics["genai_client_usage_tokens"].record(input_tokens + output_tokens, metrics_attributes)
|
105
|
-
metrics["genai_client_operation_duration"].record(scope._end_time - scope._start_time, metrics_attributes)
|
106
|
-
metrics["genai_server_tbt"].record(scope._tbt, metrics_attributes)
|
107
|
-
metrics["genai_server_ttft"].record(scope._ttft, metrics_attributes)
|
108
|
-
metrics["genai_requests"].add(1, metrics_attributes)
|
109
|
-
metrics["genai_completion_tokens"].add(output_tokens, metrics_attributes)
|
110
|
-
metrics["genai_prompt_tokens"].add(input_tokens, metrics_attributes)
|
111
|
-
metrics["genai_cost"].record(cost, metrics_attributes)
|
122
|
+
record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT,
|
123
|
+
SemanticConvention.GEN_AI_SYSTEM_HUGGING_FACE, scope._server_address, scope._server_port,
|
124
|
+
request_model, request_model, environment, application_name, scope._start_time, scope._end_time,
|
125
|
+
cost, input_tokens, output_tokens, scope._tbt, scope._ttft)
|
112
126
|
|
113
127
|
def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
|
114
128
|
environment, application_name, metrics, start_time,
|
@@ -117,67 +131,69 @@ def process_chat_response(instance, response, request_model, pricing_info, serve
|
|
117
131
|
Process chat request and generate Telemetry
|
118
132
|
"""
|
119
133
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
if self._args and len(self._args) > 0:
|
135
|
-
self._prompt = args[0]
|
134
|
+
scope = type("GenericScope", (), {})()
|
135
|
+
scope._instance = instance
|
136
|
+
scope._start_time = start_time
|
137
|
+
scope._end_time = time.time()
|
138
|
+
scope._span = span
|
139
|
+
scope._server_address = server_address
|
140
|
+
scope._server_port = server_port
|
141
|
+
scope._kwargs = kwargs
|
142
|
+
scope._args = args
|
143
|
+
|
144
|
+
# Extract prompt from args or kwargs
|
145
|
+
if args and len(args) > 0:
|
146
|
+
scope._prompt = args[0]
|
136
147
|
else:
|
137
|
-
|
148
|
+
scope._prompt = (
|
138
149
|
kwargs.get("text_inputs") or
|
139
150
|
(kwargs.get("image") and kwargs.get("question") and
|
140
|
-
|
151
|
+
("image: " + kwargs.get("image") + " question:" + kwargs.get("question"))) or
|
141
152
|
kwargs.get("fallback") or
|
142
153
|
""
|
143
154
|
)
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
155
|
+
scope._prompt = format_content(scope._prompt)
|
156
|
+
|
157
|
+
# Process response based on task type
|
158
|
+
task = kwargs.get("task", "text-generation")
|
159
|
+
|
160
|
+
if task == "text-generation":
|
161
|
+
# Handle text generation responses
|
162
|
+
if isinstance(response, list) and len(response) > 0:
|
163
|
+
first_entry = response[0]
|
164
|
+
if isinstance(first_entry, dict):
|
165
|
+
if isinstance(first_entry.get("generated_text"), list):
|
166
|
+
# Handle nested list format
|
167
|
+
last_element = first_entry.get("generated_text")[-1]
|
168
|
+
scope._completion = last_element.get("content", str(last_element))
|
169
|
+
else:
|
170
|
+
# Handle standard format
|
171
|
+
scope._completion = first_entry.get("generated_text", "")
|
172
|
+
else:
|
173
|
+
scope._completion = str(first_entry)
|
153
174
|
else:
|
154
|
-
|
155
|
-
if isinstance(entry, dict):
|
156
|
-
return entry.get("generated_text")
|
157
|
-
if isinstance(entry, list):
|
158
|
-
return " ".join(
|
159
|
-
extract_text(sub_entry) for sub_entry in entry if isinstance(sub_entry, dict)
|
160
|
-
)
|
161
|
-
return ""
|
162
|
-
|
163
|
-
# Process and collect all generated texts
|
164
|
-
self._llmresponse = [
|
165
|
-
extract_text(entry) for entry in response_dict
|
166
|
-
]
|
175
|
+
scope._completion = ""
|
167
176
|
|
168
|
-
|
169
|
-
|
177
|
+
elif task == "automatic-speech-recognition":
|
178
|
+
scope._completion = response.get("text", "") if isinstance(response, dict) else ""
|
170
179
|
|
171
|
-
elif
|
172
|
-
|
180
|
+
elif task == "image-classification":
|
181
|
+
scope._completion = str(response[0]) if isinstance(response, list) and len(response) > 0 else ""
|
173
182
|
|
174
|
-
elif
|
175
|
-
|
183
|
+
elif task == "visual-question-answering":
|
184
|
+
if isinstance(response, list) and len(response) > 0 and isinstance(response[0], dict):
|
185
|
+
scope._completion = response[0].get("answer", "")
|
186
|
+
else:
|
187
|
+
scope._completion = ""
|
188
|
+
else:
|
189
|
+
# Default handling for other tasks
|
190
|
+
scope._completion = format_content(response)
|
176
191
|
|
177
|
-
|
178
|
-
|
192
|
+
# Initialize timing attributes
|
193
|
+
scope._tbt = 0
|
194
|
+
scope._ttft = scope._end_time - scope._start_time
|
179
195
|
|
180
|
-
common_chat_logic(
|
181
|
-
|
196
|
+
common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
197
|
+
capture_message_content, disable_metrics, version, args, kwargs, is_stream=False)
|
182
198
|
|
183
199
|
return response
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# pylint: disable=useless-return, bad-staticmethod-argument, disable=duplicate-code
|
2
1
|
"""Initializer of Auto Instrumentation of vLLM Functions"""
|
3
2
|
|
4
3
|
from typing import Collection
|
@@ -14,15 +13,15 @@ _instruments = ("vllm >= 0.5.4",)
|
|
14
13
|
|
15
14
|
class VLLMInstrumentor(BaseInstrumentor):
|
16
15
|
"""
|
17
|
-
An instrumentor for vLLM
|
16
|
+
An instrumentor for vLLM client library.
|
18
17
|
"""
|
19
18
|
|
20
19
|
def instrumentation_dependencies(self) -> Collection[str]:
|
21
20
|
return _instruments
|
22
21
|
|
23
22
|
def _instrument(self, **kwargs):
|
24
|
-
application_name = kwargs.get("application_name", "
|
25
|
-
environment = kwargs.get("environment", "
|
23
|
+
application_name = kwargs.get("application_name", "default")
|
24
|
+
environment = kwargs.get("environment", "default")
|
26
25
|
tracer = kwargs.get("tracer")
|
27
26
|
metrics = kwargs.get("metrics_dict")
|
28
27
|
pricing_info = kwargs.get("pricing_info", {})
|
@@ -30,14 +29,13 @@ class VLLMInstrumentor(BaseInstrumentor):
|
|
30
29
|
disable_metrics = kwargs.get("disable_metrics")
|
31
30
|
version = importlib.metadata.version("vllm")
|
32
31
|
|
33
|
-
#
|
32
|
+
# Chat completions
|
34
33
|
wrap_function_wrapper(
|
35
34
|
"vllm.entrypoints.llm",
|
36
35
|
"LLM.generate",
|
37
36
|
generate(version, environment, application_name,
|
38
|
-
|
37
|
+
tracer, pricing_info, capture_message_content, metrics, disable_metrics),
|
39
38
|
)
|
40
39
|
|
41
40
|
def _uninstrument(self, **kwargs):
|
42
|
-
# Proper uninstrumentation logic to revert patched methods
|
43
41
|
pass
|
@@ -1,15 +1,15 @@
|
|
1
1
|
"""
|
2
|
-
|
2
|
+
vLLM OpenTelemetry instrumentation utility functions
|
3
3
|
"""
|
4
|
-
|
5
4
|
import time
|
6
|
-
|
5
|
+
|
7
6
|
from opentelemetry.trace import Status, StatusCode
|
7
|
+
|
8
8
|
from openlit.__helpers import (
|
9
|
-
calculate_tbt,
|
10
|
-
get_chat_model_cost,
|
11
9
|
general_tokens,
|
12
|
-
|
10
|
+
get_chat_model_cost,
|
11
|
+
common_span_attributes,
|
12
|
+
record_completion_metrics,
|
13
13
|
)
|
14
14
|
from openlit.semcov import SemanticConvention
|
15
15
|
|
@@ -24,77 +24,81 @@ def get_inference_config(args, kwargs):
|
|
24
24
|
return args[1]
|
25
25
|
return None
|
26
26
|
|
27
|
+
def format_content(prompts):
|
28
|
+
"""
|
29
|
+
Process a list of prompts to extract content.
|
30
|
+
"""
|
31
|
+
|
32
|
+
if isinstance(prompts, str):
|
33
|
+
return prompts
|
34
|
+
elif isinstance(prompts, list):
|
35
|
+
return "\n".join(str(prompt) for prompt in prompts)
|
36
|
+
else:
|
37
|
+
return str(prompts)
|
38
|
+
|
27
39
|
def common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
28
40
|
capture_message_content, disable_metrics, version, is_stream):
|
29
41
|
"""
|
30
42
|
Process chat request and generate Telemetry
|
31
43
|
"""
|
32
44
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
45
|
+
request_model = scope._request_model
|
46
|
+
|
47
|
+
# Extract prompts and completions from vLLM response
|
48
|
+
input_tokens = 0
|
49
|
+
output_tokens = 0
|
50
|
+
prompt = ""
|
51
|
+
completion = ""
|
52
|
+
|
53
|
+
for output in scope._response:
|
54
|
+
prompt += output.prompt + "\n"
|
55
|
+
if output.outputs and len(output.outputs) > 0:
|
56
|
+
completion += output.outputs[0].text + "\n"
|
57
|
+
input_tokens += general_tokens(output.prompt)
|
58
|
+
output_tokens += general_tokens(output.outputs[0].text)
|
59
|
+
|
60
|
+
cost = get_chat_model_cost(request_model, pricing_info, input_tokens, output_tokens)
|
61
|
+
|
62
|
+
# Common Span Attributes
|
63
|
+
common_span_attributes(scope,
|
64
|
+
SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
65
|
+
scope._server_address, scope._server_port, request_model, request_model,
|
66
|
+
environment, application_name, is_stream, scope._tbt, scope._ttft, version)
|
67
|
+
|
68
|
+
# Span Attributes for Request parameters
|
46
69
|
inference_config = get_inference_config(scope._args, scope._kwargs)
|
47
70
|
if inference_config:
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
(
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
value = getattr(inference_config, key, None)
|
60
|
-
if value is not None:
|
61
|
-
scope._span.set_attribute(attribute, value)
|
62
|
-
|
63
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_RESPONSE_MODEL, scope._request_model)
|
71
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_MAX_TOKENS, getattr(inference_config, 'max_tokens', -1))
|
72
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_STOP_SEQUENCES, getattr(inference_config, 'stop_sequences', []))
|
73
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TEMPERATURE, getattr(inference_config, 'temperature', 1.0))
|
74
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_P, getattr(inference_config, 'top_p', 1.0))
|
75
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_TOP_K, getattr(inference_config, 'top_k', -1))
|
76
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_PRESENCE_PENALTY,
|
77
|
+
getattr(inference_config, 'presence_penalty', 0.0))
|
78
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_REQUEST_FREQUENCY_PENALTY,
|
79
|
+
getattr(inference_config, 'frequency_penalty', 0.0))
|
80
|
+
|
81
|
+
# Span Attributes for Response parameters
|
64
82
|
scope._span.set_attribute(SemanticConvention.GEN_AI_OUTPUT_TYPE, "text")
|
65
83
|
|
66
|
-
#
|
67
|
-
scope._span.set_attribute(
|
68
|
-
scope._span.set_attribute(
|
69
|
-
scope._span.set_attribute(SemanticConvention.
|
70
|
-
scope._span.set_attribute(SemanticConvention.
|
71
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SERVER_TTFT, scope._ttft)
|
72
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_SDK_VERSION, version)
|
73
|
-
|
74
|
-
input_tokens = 0
|
75
|
-
output_tokens = 0
|
76
|
-
cost = 0
|
84
|
+
# Span Attributes for Cost and Tokens
|
85
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
86
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
87
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE, input_tokens + output_tokens)
|
88
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
77
89
|
|
90
|
+
# Span Attributes for Content
|
78
91
|
if capture_message_content:
|
79
|
-
prompt
|
80
|
-
completion
|
92
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_PROMPT, prompt)
|
93
|
+
scope._span.set_attribute(SemanticConvention.GEN_AI_CONTENT_COMPLETION, completion)
|
81
94
|
|
82
|
-
|
83
|
-
prompt += output.prompt + "\n"
|
84
|
-
if output.outputs and len(output.outputs) > 0:
|
85
|
-
completion += output.outputs[0].text + "\n"
|
86
|
-
input_tokens += general_tokens(output.prompt)
|
87
|
-
output_tokens += general_tokens(output.outputs[0].text)
|
88
|
-
|
89
|
-
# Add a single event for prompt
|
95
|
+
# To be removed once the change to span_attributes (from span events) is complete
|
90
96
|
scope._span.add_event(
|
91
97
|
name=SemanticConvention.GEN_AI_CONTENT_PROMPT_EVENT,
|
92
98
|
attributes={
|
93
99
|
SemanticConvention.GEN_AI_CONTENT_PROMPT: prompt,
|
94
100
|
},
|
95
101
|
)
|
96
|
-
|
97
|
-
# Add a single event for completion
|
98
102
|
scope._span.add_event(
|
99
103
|
name=SemanticConvention.GEN_AI_CONTENT_COMPLETION_EVENT,
|
100
104
|
attributes={
|
@@ -102,39 +106,14 @@ def common_chat_logic(scope, pricing_info, environment, application_name, metric
|
|
102
106
|
},
|
103
107
|
)
|
104
108
|
|
105
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_INPUT_TOKENS,
|
106
|
-
input_tokens)
|
107
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_OUTPUT_TOKENS,
|
108
|
-
output_tokens)
|
109
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_CLIENT_TOKEN_USAGE,
|
110
|
-
input_tokens + output_tokens)
|
111
|
-
|
112
|
-
# Calculate cost of the operation
|
113
|
-
cost = get_chat_model_cost(scope._request_model, pricing_info, input_tokens, output_tokens)
|
114
|
-
scope._span.set_attribute(SemanticConvention.GEN_AI_USAGE_COST, cost)
|
115
|
-
|
116
109
|
scope._span.set_status(Status(StatusCode.OK))
|
117
110
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
request_model=scope._request_model,
|
125
|
-
server_address=scope._server_address,
|
126
|
-
server_port=scope._server_port,
|
127
|
-
response_model=scope._request_model,
|
128
|
-
)
|
129
|
-
metrics['genai_client_operation_duration'].record(scope._end_time - scope._start_time, metrics_attributes)
|
130
|
-
metrics['genai_server_tbt'].record(scope._tbt, metrics_attributes)
|
131
|
-
metrics['genai_server_ttft'].record(scope._ttft, metrics_attributes)
|
132
|
-
metrics['genai_requests'].add(1, metrics_attributes)
|
133
|
-
metrics['genai_completion_tokens'].add(output_tokens, metrics_attributes)
|
134
|
-
metrics['genai_prompt_tokens'].add(input_tokens, metrics_attributes)
|
135
|
-
metrics['genai_cost'].record(cost, metrics_attributes)
|
136
|
-
metrics['genai_client_usage_tokens'].record(
|
137
|
-
input_tokens + output_tokens, metrics_attributes)
|
111
|
+
# Metrics
|
112
|
+
if not disable_metrics:
|
113
|
+
record_completion_metrics(metrics, SemanticConvention.GEN_AI_OPERATION_TYPE_CHAT, SemanticConvention.GEN_AI_SYSTEM_VLLM,
|
114
|
+
scope._server_address, scope._server_port, request_model, request_model, environment,
|
115
|
+
application_name, scope._start_time, scope._end_time, input_tokens, output_tokens,
|
116
|
+
cost, scope._tbt, scope._ttft)
|
138
117
|
|
139
118
|
def process_chat_response(instance, response, request_model, pricing_info, server_port, server_address,
|
140
119
|
environment, application_name, metrics, start_time, span, args, kwargs,
|
@@ -142,20 +121,23 @@ def process_chat_response(instance, response, request_model, pricing_info, serve
|
|
142
121
|
"""
|
143
122
|
Process chat request and generate Telemetry
|
144
123
|
"""
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
124
|
+
|
125
|
+
# Create scope object
|
126
|
+
scope = type("GenericScope", (), {})()
|
127
|
+
|
128
|
+
scope._response = response
|
129
|
+
scope._start_time = start_time
|
130
|
+
scope._end_time = time.time()
|
131
|
+
scope._span = span
|
132
|
+
scope._ttft, scope._tbt = scope._end_time - scope._start_time, 0
|
133
|
+
scope._server_address = server_address
|
134
|
+
scope._server_port = server_port
|
135
|
+
scope._request_model = request_model
|
136
|
+
scope._timestamps = []
|
137
|
+
scope._args = args
|
138
|
+
scope._kwargs = kwargs
|
139
|
+
|
140
|
+
common_chat_logic(scope, pricing_info, environment, application_name, metrics,
|
159
141
|
capture_message_content, disable_metrics, version, is_stream=False)
|
160
142
|
|
161
143
|
return response
|
@@ -2,7 +2,6 @@
|
|
2
2
|
Module for monitoring vLLM API calls.
|
3
3
|
"""
|
4
4
|
|
5
|
-
import logging
|
6
5
|
import time
|
7
6
|
from opentelemetry.trace import SpanKind
|
8
7
|
from openlit.__helpers import (
|
@@ -14,11 +13,8 @@ from openlit.instrumentation.vllm.utils import (
|
|
14
13
|
)
|
15
14
|
from openlit.semcov import SemanticConvention
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
def generate(version, environment, application_name,
|
21
|
-
tracer, pricing_info, capture_message_content, metrics, disable_metrics):
|
16
|
+
def generate(version, environment, application_name, tracer, pricing_info,
|
17
|
+
capture_message_content, metrics, disable_metrics):
|
22
18
|
"""
|
23
19
|
Generates a telemetry wrapper for GenAI function call
|
24
20
|
"""
|
@@ -27,7 +23,6 @@ def generate(version, environment, application_name,
|
|
27
23
|
"""
|
28
24
|
Wraps the GenAI function call.
|
29
25
|
"""
|
30
|
-
|
31
26
|
server_address, server_port = set_server_address_and_port(instance, "http://127.0.0.1", 443)
|
32
27
|
request_model = instance.llm_engine.model_config.model or "facebook/opt-125m"
|
33
28
|
|
@@ -56,9 +51,9 @@ def generate(version, environment, application_name,
|
|
56
51
|
disable_metrics=disable_metrics,
|
57
52
|
version=version,
|
58
53
|
)
|
54
|
+
|
59
55
|
except Exception as e:
|
60
56
|
handle_exception(span, e)
|
61
|
-
logger.error("Error in trace creation: %s", e)
|
62
57
|
|
63
58
|
return response
|
64
59
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: openlit
|
3
|
-
Version: 1.34.
|
3
|
+
Version: 1.34.14
|
4
4
|
Summary: OpenTelemetry-native Auto instrumentation library for monitoring LLM Applications and GPUs, facilitating the integration of observability into your GenAI-driven projects
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: OpenTelemetry,otel,otlp,llm,tracing,openai,anthropic,claude,cohere,llm monitoring,observability,monitoring,gpt,Generative AI,chatGPT,gpu
|
@@ -125,20 +125,20 @@ openlit/instrumentation/together/__init__.py,sha256=0UmUqQtppyK3oopb4lTjX2LITgVC
|
|
125
125
|
openlit/instrumentation/together/async_together.py,sha256=0-h5fKw6rIwN_fvWVpGuvVqizIuM9xFCzz8Z4oGgOj0,6822
|
126
126
|
openlit/instrumentation/together/together.py,sha256=nY6mzHmHgoMbbnB_9eL0EBQjP0ltJVdkQj4pbamHAj0,6723
|
127
127
|
openlit/instrumentation/together/utils.py,sha256=n7r_pM_sqFnJEAkL7OhPydr0Uct0A74vXdcYELdbeW0,14368
|
128
|
-
openlit/instrumentation/transformers/__init__.py,sha256=
|
129
|
-
openlit/instrumentation/transformers/transformers.py,sha256=
|
130
|
-
openlit/instrumentation/transformers/utils.py,sha256=
|
128
|
+
openlit/instrumentation/transformers/__init__.py,sha256=hXq0WUZNl6Sz0Ihk29kA9i8Q1j0e1URFb7v7etnQpxI,1511
|
129
|
+
openlit/instrumentation/transformers/transformers.py,sha256=MHnHVo_6NP0gSIqxen6qQpCrZ0fs8Ec80EdZumMpVNo,1797
|
130
|
+
openlit/instrumentation/transformers/utils.py,sha256=MMy_SyRyDI4X-0mqbBwStac0xabmw0ZRvv_VWLA_Nkg,8426
|
131
131
|
openlit/instrumentation/vertexai/__init__.py,sha256=mT28WCBvQfRCkAWGL6bd0EjEPHvMjaNcz6T3jsLZh8k,3745
|
132
132
|
openlit/instrumentation/vertexai/async_vertexai.py,sha256=-kpg-eiL76O5_XopUPghCYwJHf0Nrxi00_Z5tCwq6zM,23086
|
133
133
|
openlit/instrumentation/vertexai/vertexai.py,sha256=5NB090aWlm9DnlccNNLRO6A97P_RN-JnHb5JS01tYyw,23000
|
134
|
-
openlit/instrumentation/vllm/__init__.py,sha256=
|
135
|
-
openlit/instrumentation/vllm/utils.py,sha256=
|
136
|
-
openlit/instrumentation/vllm/vllm.py,sha256=
|
134
|
+
openlit/instrumentation/vllm/__init__.py,sha256=uaSzQmgDuKJ-sh61sfVdzVt2qAZaozZIQ8sbmQ0XpZE,1357
|
135
|
+
openlit/instrumentation/vllm/utils.py,sha256=HuCPNBgChWg9vA7DHNFCij_y8qj27DjZxdZ0Nvdt2fg,5751
|
136
|
+
openlit/instrumentation/vllm/vllm.py,sha256=VzazF2f4LLwjZDO_G8lIN_d622oSJM0fIO9wjxXbhyg,2004
|
137
137
|
openlit/otel/events.py,sha256=VrMjTpvnLtYRBHCiFwJojTQqqNpRCxoD4yJYeQrtPsk,3560
|
138
138
|
openlit/otel/metrics.py,sha256=GM2PDloBGRhBTkHHkYaqmOwIAQkY124ZhW4sEqW1Fgk,7086
|
139
139
|
openlit/otel/tracing.py,sha256=tjV2bEbEDPUB1Z46gE-UsJsb04sRdFrfbhIDkxViZc0,3103
|
140
140
|
openlit/semcov/__init__.py,sha256=ptyo37PY-FHDx_PShEvbdns71cD4YvvXw15bCRXKCKM,13461
|
141
|
-
openlit-1.34.
|
142
|
-
openlit-1.34.
|
143
|
-
openlit-1.34.
|
144
|
-
openlit-1.34.
|
141
|
+
openlit-1.34.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
142
|
+
openlit-1.34.14.dist-info/METADATA,sha256=qaOh__y9R5tT0z7qveai1LH4KWY7ampN3PzVhhm7D0M,23470
|
143
|
+
openlit-1.34.14.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
144
|
+
openlit-1.34.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|